{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "56d53981",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "No GPU automatically detected. Setting SETTINGS.GPU to 0, and SETTINGS.NJOBS to cpu_count.\n"
     ]
    }
   ],
   "source": [
    "import cdt\n",
    "import random"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "33e5859d",
   "metadata": {},
   "outputs": [],
   "source": [
    "random.seed(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "0f680b24",
   "metadata": {},
   "outputs": [],
   "source": [
    "data = cdt.data.load_dataset(\"sachs\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "2e1003b4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>praf</th>\n",
       "      <th>pmek</th>\n",
       "      <th>plcg</th>\n",
       "      <th>PIP2</th>\n",
       "      <th>PIP3</th>\n",
       "      <th>p44/42</th>\n",
       "      <th>pakts473</th>\n",
       "      <th>PKA</th>\n",
       "      <th>PKC</th>\n",
       "      <th>P38</th>\n",
       "      <th>pjnk</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>26.4</td>\n",
       "      <td>13.20</td>\n",
       "      <td>8.82</td>\n",
       "      <td>18.30</td>\n",
       "      <td>58.80</td>\n",
       "      <td>6.61</td>\n",
       "      <td>17.0</td>\n",
       "      <td>414.0</td>\n",
       "      <td>17.00</td>\n",
       "      <td>44.90</td>\n",
       "      <td>40.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>35.9</td>\n",
       "      <td>16.50</td>\n",
       "      <td>12.30</td>\n",
       "      <td>16.80</td>\n",
       "      <td>8.13</td>\n",
       "      <td>18.60</td>\n",
       "      <td>32.5</td>\n",
       "      <td>352.0</td>\n",
       "      <td>3.37</td>\n",
       "      <td>16.50</td>\n",
       "      <td>61.50</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>59.4</td>\n",
       "      <td>44.10</td>\n",
       "      <td>14.60</td>\n",
       "      <td>10.20</td>\n",
       "      <td>13.00</td>\n",
       "      <td>14.90</td>\n",
       "      <td>32.5</td>\n",
       "      <td>403.0</td>\n",
       "      <td>11.40</td>\n",
       "      <td>31.90</td>\n",
       "      <td>19.50</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>73.0</td>\n",
       "      <td>82.80</td>\n",
       "      <td>23.10</td>\n",
       "      <td>13.50</td>\n",
       "      <td>1.29</td>\n",
       "      <td>5.83</td>\n",
       "      <td>11.8</td>\n",
       "      <td>528.0</td>\n",
       "      <td>13.70</td>\n",
       "      <td>28.60</td>\n",
       "      <td>23.10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>33.7</td>\n",
       "      <td>19.80</td>\n",
       "      <td>5.19</td>\n",
       "      <td>9.73</td>\n",
       "      <td>24.80</td>\n",
       "      <td>21.10</td>\n",
       "      <td>46.1</td>\n",
       "      <td>305.0</td>\n",
       "      <td>4.66</td>\n",
       "      <td>25.70</td>\n",
       "      <td>81.30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7461</th>\n",
       "      <td>49.1</td>\n",
       "      <td>12.40</td>\n",
       "      <td>32.80</td>\n",
       "      <td>27.90</td>\n",
       "      <td>22.70</td>\n",
       "      <td>11.70</td>\n",
       "      <td>38.2</td>\n",
       "      <td>1144.0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>2.55</td>\n",
       "      <td>1.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7462</th>\n",
       "      <td>23.3</td>\n",
       "      <td>4.61</td>\n",
       "      <td>17.80</td>\n",
       "      <td>22.10</td>\n",
       "      <td>14.90</td>\n",
       "      <td>48.70</td>\n",
       "      <td>67.3</td>\n",
       "      <td>922.0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>9.82</td>\n",
       "      <td>1.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7463</th>\n",
       "      <td>28.1</td>\n",
       "      <td>4.49</td>\n",
       "      <td>18.80</td>\n",
       "      <td>20.20</td>\n",
       "      <td>10.20</td>\n",
       "      <td>3.08</td>\n",
       "      <td>21.9</td>\n",
       "      <td>730.0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>1.75</td>\n",
       "      <td>2.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7464</th>\n",
       "      <td>34.6</td>\n",
       "      <td>7.10</td>\n",
       "      <td>5.73</td>\n",
       "      <td>20.70</td>\n",
       "      <td>15.10</td>\n",
       "      <td>32.20</td>\n",
       "      <td>41.4</td>\n",
       "      <td>813.0</td>\n",
       "      <td>44.50</td>\n",
       "      <td>1382.00</td>\n",
       "      <td>2.44</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7465</th>\n",
       "      <td>30.5</td>\n",
       "      <td>1.01</td>\n",
       "      <td>7.30</td>\n",
       "      <td>173.00</td>\n",
       "      <td>22.90</td>\n",
       "      <td>6.61</td>\n",
       "      <td>13.7</td>\n",
       "      <td>890.0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>1.00</td>\n",
       "      <td>1.65</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>7466 rows × 11 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      praf   pmek   plcg    PIP2   PIP3  p44/42  pakts473     PKA    PKC  \\\n",
       "0     26.4  13.20   8.82   18.30  58.80    6.61      17.0   414.0  17.00   \n",
       "1     35.9  16.50  12.30   16.80   8.13   18.60      32.5   352.0   3.37   \n",
       "2     59.4  44.10  14.60   10.20  13.00   14.90      32.5   403.0  11.40   \n",
       "3     73.0  82.80  23.10   13.50   1.29    5.83      11.8   528.0  13.70   \n",
       "4     33.7  19.80   5.19    9.73  24.80   21.10      46.1   305.0   4.66   \n",
       "...    ...    ...    ...     ...    ...     ...       ...     ...    ...   \n",
       "7461  49.1  12.40  32.80   27.90  22.70   11.70      38.2  1144.0   1.00   \n",
       "7462  23.3   4.61  17.80   22.10  14.90   48.70      67.3   922.0   1.00   \n",
       "7463  28.1   4.49  18.80   20.20  10.20    3.08      21.9   730.0   1.00   \n",
       "7464  34.6   7.10   5.73   20.70  15.10   32.20      41.4   813.0  44.50   \n",
       "7465  30.5   1.01   7.30  173.00  22.90    6.61      13.7   890.0   1.00   \n",
       "\n",
       "          P38   pjnk  \n",
       "0       44.90  40.00  \n",
       "1       16.50  61.50  \n",
       "2       31.90  19.50  \n",
       "3       28.60  23.10  \n",
       "4       25.70  81.30  \n",
       "...       ...    ...  \n",
       "7461     2.55   1.00  \n",
       "7462     9.82   1.00  \n",
       "7463     1.75   2.00  \n",
       "7464  1382.00   2.44  \n",
       "7465     1.00   1.65  \n",
       "\n",
       "[7466 rows x 11 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "d368cd79",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['praf', 'pmek', 'plcg', 'PIP2', 'PIP3', 'p44/42', 'pakts473', 'PKA',\n",
       "       'PKC', 'P38', 'pjnk'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[0].columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "9f2a51b2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>praf</th>\n",
       "      <th>pmek</th>\n",
       "      <th>plcg</th>\n",
       "      <th>PIP2</th>\n",
       "      <th>PIP3</th>\n",
       "      <th>p44/42</th>\n",
       "      <th>pakts473</th>\n",
       "      <th>PKA</th>\n",
       "      <th>PKC</th>\n",
       "      <th>P38</th>\n",
       "      <th>pjnk</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>praf</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.990238</td>\n",
       "      <td>0.241964</td>\n",
       "      <td>0.215247</td>\n",
       "      <td>-0.010558</td>\n",
       "      <td>0.028251</td>\n",
       "      <td>0.273803</td>\n",
       "      <td>-0.147102</td>\n",
       "      <td>0.194045</td>\n",
       "      <td>0.218698</td>\n",
       "      <td>0.192689</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pmek</th>\n",
       "      <td>0.990238</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.271103</td>\n",
       "      <td>0.241443</td>\n",
       "      <td>-0.017135</td>\n",
       "      <td>0.039611</td>\n",
       "      <td>0.309448</td>\n",
       "      <td>-0.159994</td>\n",
       "      <td>0.218725</td>\n",
       "      <td>0.244555</td>\n",
       "      <td>0.221409</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>plcg</th>\n",
       "      <td>0.241964</td>\n",
       "      <td>0.271103</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.926233</td>\n",
       "      <td>0.086955</td>\n",
       "      <td>0.153027</td>\n",
       "      <td>0.442973</td>\n",
       "      <td>-0.193252</td>\n",
       "      <td>0.355486</td>\n",
       "      <td>0.377967</td>\n",
       "      <td>0.410240</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>PIP2</th>\n",
       "      <td>0.215247</td>\n",
       "      <td>0.241443</td>\n",
       "      <td>0.926233</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.195057</td>\n",
       "      <td>0.140663</td>\n",
       "      <td>0.408849</td>\n",
       "      <td>-0.177130</td>\n",
       "      <td>0.337047</td>\n",
       "      <td>0.356615</td>\n",
       "      <td>0.382692</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>PIP3</th>\n",
       "      <td>-0.010558</td>\n",
       "      <td>-0.017135</td>\n",
       "      <td>0.086955</td>\n",
       "      <td>0.195057</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>-0.032356</td>\n",
       "      <td>-0.049051</td>\n",
       "      <td>0.004952</td>\n",
       "      <td>-0.032547</td>\n",
       "      <td>-0.030729</td>\n",
       "      <td>-0.040550</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>p44/42</th>\n",
       "      <td>0.028251</td>\n",
       "      <td>0.039611</td>\n",
       "      <td>0.153027</td>\n",
       "      <td>0.140663</td>\n",
       "      <td>-0.032356</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.687325</td>\n",
       "      <td>0.221461</td>\n",
       "      <td>0.131469</td>\n",
       "      <td>0.135651</td>\n",
       "      <td>0.156159</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pakts473</th>\n",
       "      <td>0.273803</td>\n",
       "      <td>0.309448</td>\n",
       "      <td>0.442973</td>\n",
       "      <td>0.408849</td>\n",
       "      <td>-0.049051</td>\n",
       "      <td>0.687325</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>-0.012467</td>\n",
       "      <td>0.379351</td>\n",
       "      <td>0.401112</td>\n",
       "      <td>0.440162</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>PKA</th>\n",
       "      <td>-0.147102</td>\n",
       "      <td>-0.159994</td>\n",
       "      <td>-0.193252</td>\n",
       "      <td>-0.177130</td>\n",
       "      <td>0.004952</td>\n",
       "      <td>0.221461</td>\n",
       "      <td>-0.012467</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>-0.159382</td>\n",
       "      <td>-0.181486</td>\n",
       "      <td>-0.178506</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>PKC</th>\n",
       "      <td>0.194045</td>\n",
       "      <td>0.218725</td>\n",
       "      <td>0.355486</td>\n",
       "      <td>0.337047</td>\n",
       "      <td>-0.032547</td>\n",
       "      <td>0.131469</td>\n",
       "      <td>0.379351</td>\n",
       "      <td>-0.159382</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.958921</td>\n",
       "      <td>0.813999</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>P38</th>\n",
       "      <td>0.218698</td>\n",
       "      <td>0.244555</td>\n",
       "      <td>0.377967</td>\n",
       "      <td>0.356615</td>\n",
       "      <td>-0.030729</td>\n",
       "      <td>0.135651</td>\n",
       "      <td>0.401112</td>\n",
       "      <td>-0.181486</td>\n",
       "      <td>0.958921</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.798476</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pjnk</th>\n",
       "      <td>0.192689</td>\n",
       "      <td>0.221409</td>\n",
       "      <td>0.410240</td>\n",
       "      <td>0.382692</td>\n",
       "      <td>-0.040550</td>\n",
       "      <td>0.156159</td>\n",
       "      <td>0.440162</td>\n",
       "      <td>-0.178506</td>\n",
       "      <td>0.813999</td>\n",
       "      <td>0.798476</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              praf      pmek      plcg      PIP2      PIP3    p44/42  \\\n",
       "praf      1.000000  0.990238  0.241964  0.215247 -0.010558  0.028251   \n",
       "pmek      0.990238  1.000000  0.271103  0.241443 -0.017135  0.039611   \n",
       "plcg      0.241964  0.271103  1.000000  0.926233  0.086955  0.153027   \n",
       "PIP2      0.215247  0.241443  0.926233  1.000000  0.195057  0.140663   \n",
       "PIP3     -0.010558 -0.017135  0.086955  0.195057  1.000000 -0.032356   \n",
       "p44/42    0.028251  0.039611  0.153027  0.140663 -0.032356  1.000000   \n",
       "pakts473  0.273803  0.309448  0.442973  0.408849 -0.049051  0.687325   \n",
       "PKA      -0.147102 -0.159994 -0.193252 -0.177130  0.004952  0.221461   \n",
       "PKC       0.194045  0.218725  0.355486  0.337047 -0.032547  0.131469   \n",
       "P38       0.218698  0.244555  0.377967  0.356615 -0.030729  0.135651   \n",
       "pjnk      0.192689  0.221409  0.410240  0.382692 -0.040550  0.156159   \n",
       "\n",
       "          pakts473       PKA       PKC       P38      pjnk  \n",
       "praf      0.273803 -0.147102  0.194045  0.218698  0.192689  \n",
       "pmek      0.309448 -0.159994  0.218725  0.244555  0.221409  \n",
       "plcg      0.442973 -0.193252  0.355486  0.377967  0.410240  \n",
       "PIP2      0.408849 -0.177130  0.337047  0.356615  0.382692  \n",
       "PIP3     -0.049051  0.004952 -0.032547 -0.030729 -0.040550  \n",
       "p44/42    0.687325  0.221461  0.131469  0.135651  0.156159  \n",
       "pakts473  1.000000 -0.012467  0.379351  0.401112  0.440162  \n",
       "PKA      -0.012467  1.000000 -0.159382 -0.181486 -0.178506  \n",
       "PKC       0.379351 -0.159382  1.000000  0.958921  0.813999  \n",
       "P38       0.401112 -0.181486  0.958921  1.000000  0.798476  \n",
       "pjnk      0.440162 -0.178506  0.813999  0.798476  1.000000  "
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[0].corr()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "85c747a1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>praf</th>\n",
       "      <th>pmek</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>7466.000000</td>\n",
       "      <td>7466.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>124.071930</td>\n",
       "      <td>145.380962</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>247.528092</td>\n",
       "      <td>377.056214</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>30.800000</td>\n",
       "      <td>16.500000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>53.800000</td>\n",
       "      <td>26.700000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>103.000000</td>\n",
       "      <td>64.400000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>4614.000000</td>\n",
       "      <td>7105.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              praf         pmek\n",
       "count  7466.000000  7466.000000\n",
       "mean    124.071930   145.380962\n",
       "std     247.528092   377.056214\n",
       "min       1.000000     1.000000\n",
       "25%      30.800000    16.500000\n",
       "50%      53.800000    26.700000\n",
       "75%     103.000000    64.400000\n",
       "max    4614.000000  7105.000000"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[0][[\"praf\", \"pmek\"]].describe()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "165d396f",
   "metadata": {},
   "source": [
    "## Mask P38 Based on PKC (Prediction target P38)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "18c3b321",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>PKC</th>\n",
       "      <th>P38</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>7466.000000</td>\n",
       "      <td>7466.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>30.341658</td>\n",
       "      <td>135.014504</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>92.870039</td>\n",
       "      <td>494.768842</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>4.460000</td>\n",
       "      <td>19.300000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>12.700000</td>\n",
       "      <td>30.500000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>23.500000</td>\n",
       "      <td>49.600000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>1611.000000</td>\n",
       "      <td>7499.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               PKC          P38\n",
       "count  7466.000000  7466.000000\n",
       "mean     30.341658   135.014504\n",
       "std      92.870039   494.768842\n",
       "min       1.000000     1.000000\n",
       "25%       4.460000    19.300000\n",
       "50%      12.700000    30.500000\n",
       "75%      23.500000    49.600000\n",
       "max    1611.000000  7499.000000"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[0][[\"PKC\", \"P38\"]].describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "2e5a3f68",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>praf</th>\n",
       "      <th>pmek</th>\n",
       "      <th>plcg</th>\n",
       "      <th>PIP2</th>\n",
       "      <th>PIP3</th>\n",
       "      <th>p44/42</th>\n",
       "      <th>pakts473</th>\n",
       "      <th>PKA</th>\n",
       "      <th>PKC</th>\n",
       "      <th>P38</th>\n",
       "      <th>pjnk</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>104.00</td>\n",
       "      <td>61.50</td>\n",
       "      <td>10.60</td>\n",
       "      <td>21.1</td>\n",
       "      <td>41.8</td>\n",
       "      <td>11.50</td>\n",
       "      <td>23.5</td>\n",
       "      <td>445.0</td>\n",
       "      <td>29.2</td>\n",
       "      <td>61.0</td>\n",
       "      <td>25.30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>74.30</td>\n",
       "      <td>22.90</td>\n",
       "      <td>7.50</td>\n",
       "      <td>15.5</td>\n",
       "      <td>26.2</td>\n",
       "      <td>20.90</td>\n",
       "      <td>36.5</td>\n",
       "      <td>389.0</td>\n",
       "      <td>31.9</td>\n",
       "      <td>71.0</td>\n",
       "      <td>35.50</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>50.00</td>\n",
       "      <td>13.80</td>\n",
       "      <td>11.90</td>\n",
       "      <td>13.2</td>\n",
       "      <td>11.3</td>\n",
       "      <td>18.10</td>\n",
       "      <td>27.9</td>\n",
       "      <td>392.0</td>\n",
       "      <td>56.2</td>\n",
       "      <td>77.0</td>\n",
       "      <td>1.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>26.20</td>\n",
       "      <td>26.70</td>\n",
       "      <td>21.30</td>\n",
       "      <td>10.9</td>\n",
       "      <td>14.7</td>\n",
       "      <td>9.06</td>\n",
       "      <td>37.9</td>\n",
       "      <td>89.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>65.5</td>\n",
       "      <td>1.42</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>30.50</td>\n",
       "      <td>19.80</td>\n",
       "      <td>7.50</td>\n",
       "      <td>133.0</td>\n",
       "      <td>15.7</td>\n",
       "      <td>19.10</td>\n",
       "      <td>36.2</td>\n",
       "      <td>319.0</td>\n",
       "      <td>24.1</td>\n",
       "      <td>37.2</td>\n",
       "      <td>17.20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7114</th>\n",
       "      <td>2.23</td>\n",
       "      <td>2.71</td>\n",
       "      <td>10.20</td>\n",
       "      <td>40.7</td>\n",
       "      <td>50.0</td>\n",
       "      <td>21.90</td>\n",
       "      <td>29.2</td>\n",
       "      <td>403.0</td>\n",
       "      <td>25.7</td>\n",
       "      <td>505.0</td>\n",
       "      <td>9.22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7253</th>\n",
       "      <td>34.90</td>\n",
       "      <td>3.02</td>\n",
       "      <td>30.80</td>\n",
       "      <td>24.1</td>\n",
       "      <td>20.4</td>\n",
       "      <td>18.80</td>\n",
       "      <td>19.5</td>\n",
       "      <td>239.0</td>\n",
       "      <td>39.2</td>\n",
       "      <td>28.4</td>\n",
       "      <td>7.99</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7298</th>\n",
       "      <td>58.30</td>\n",
       "      <td>10.40</td>\n",
       "      <td>28.60</td>\n",
       "      <td>25.7</td>\n",
       "      <td>14.2</td>\n",
       "      <td>29.70</td>\n",
       "      <td>27.6</td>\n",
       "      <td>237.0</td>\n",
       "      <td>24.1</td>\n",
       "      <td>2017.0</td>\n",
       "      <td>1.22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7337</th>\n",
       "      <td>58.80</td>\n",
       "      <td>1.81</td>\n",
       "      <td>12.70</td>\n",
       "      <td>231.0</td>\n",
       "      <td>35.2</td>\n",
       "      <td>9.31</td>\n",
       "      <td>13.7</td>\n",
       "      <td>302.0</td>\n",
       "      <td>63.2</td>\n",
       "      <td>2525.0</td>\n",
       "      <td>12.60</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7464</th>\n",
       "      <td>34.60</td>\n",
       "      <td>7.10</td>\n",
       "      <td>5.73</td>\n",
       "      <td>20.7</td>\n",
       "      <td>15.1</td>\n",
       "      <td>32.20</td>\n",
       "      <td>41.4</td>\n",
       "      <td>813.0</td>\n",
       "      <td>44.5</td>\n",
       "      <td>1382.0</td>\n",
       "      <td>2.44</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1851 rows × 11 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        praf   pmek   plcg   PIP2  PIP3  p44/42  pakts473    PKA   PKC  \\\n",
       "8     104.00  61.50  10.60   21.1  41.8   11.50      23.5  445.0  29.2   \n",
       "11     74.30  22.90   7.50   15.5  26.2   20.90      36.5  389.0  31.9   \n",
       "15     50.00  13.80  11.90   13.2  11.3   18.10      27.9  392.0  56.2   \n",
       "16     26.20  26.70  21.30   10.9  14.7    9.06      37.9   89.0  40.0   \n",
       "18     30.50  19.80   7.50  133.0  15.7   19.10      36.2  319.0  24.1   \n",
       "...      ...    ...    ...    ...   ...     ...       ...    ...   ...   \n",
       "7114    2.23   2.71  10.20   40.7  50.0   21.90      29.2  403.0  25.7   \n",
       "7253   34.90   3.02  30.80   24.1  20.4   18.80      19.5  239.0  39.2   \n",
       "7298   58.30  10.40  28.60   25.7  14.2   29.70      27.6  237.0  24.1   \n",
       "7337   58.80   1.81  12.70  231.0  35.2    9.31      13.7  302.0  63.2   \n",
       "7464   34.60   7.10   5.73   20.7  15.1   32.20      41.4  813.0  44.5   \n",
       "\n",
       "         P38   pjnk  \n",
       "8       61.0  25.30  \n",
       "11      71.0  35.50  \n",
       "15      77.0   1.00  \n",
       "16      65.5   1.42  \n",
       "18      37.2  17.20  \n",
       "...      ...    ...  \n",
       "7114   505.0   9.22  \n",
       "7253    28.4   7.99  \n",
       "7298  2017.0   1.22  \n",
       "7337  2525.0  12.60  \n",
       "7464  1382.0   2.44  \n",
       "\n",
       "[1851 rows x 11 columns]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[0].loc[data[0][\"PKC\"]>23.5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "1fd68208",
   "metadata": {},
   "outputs": [],
   "source": [
    "PKC_q = data[0].loc[data[0][\"PKC\"]>23.5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "288a5bbd",
   "metadata": {},
   "outputs": [],
   "source": [
    "PKC_s_idx = random.sample(range(len(PKC_q)), int(len(PKC_q) * 0.9))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "9f856f11",
   "metadata": {},
   "outputs": [],
   "source": [
    "PKC_s_idx = sorted(PKC_s_idx)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "853bb6c1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>praf</th>\n",
       "      <th>pmek</th>\n",
       "      <th>plcg</th>\n",
       "      <th>PIP2</th>\n",
       "      <th>PIP3</th>\n",
       "      <th>p44/42</th>\n",
       "      <th>pakts473</th>\n",
       "      <th>PKA</th>\n",
       "      <th>PKC</th>\n",
       "      <th>P38</th>\n",
       "      <th>pjnk</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>104.00</td>\n",
       "      <td>61.50</td>\n",
       "      <td>10.60</td>\n",
       "      <td>21.1</td>\n",
       "      <td>41.8</td>\n",
       "      <td>11.50</td>\n",
       "      <td>23.5</td>\n",
       "      <td>445.0</td>\n",
       "      <td>29.2</td>\n",
       "      <td>61.0</td>\n",
       "      <td>25.30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>74.30</td>\n",
       "      <td>22.90</td>\n",
       "      <td>7.50</td>\n",
       "      <td>15.5</td>\n",
       "      <td>26.2</td>\n",
       "      <td>20.90</td>\n",
       "      <td>36.5</td>\n",
       "      <td>389.0</td>\n",
       "      <td>31.9</td>\n",
       "      <td>71.0</td>\n",
       "      <td>35.50</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>50.00</td>\n",
       "      <td>13.80</td>\n",
       "      <td>11.90</td>\n",
       "      <td>13.2</td>\n",
       "      <td>11.3</td>\n",
       "      <td>18.10</td>\n",
       "      <td>27.9</td>\n",
       "      <td>392.0</td>\n",
       "      <td>56.2</td>\n",
       "      <td>77.0</td>\n",
       "      <td>1.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>26.20</td>\n",
       "      <td>26.70</td>\n",
       "      <td>21.30</td>\n",
       "      <td>10.9</td>\n",
       "      <td>14.7</td>\n",
       "      <td>9.06</td>\n",
       "      <td>37.9</td>\n",
       "      <td>89.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>65.5</td>\n",
       "      <td>1.42</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>30.50</td>\n",
       "      <td>19.80</td>\n",
       "      <td>7.50</td>\n",
       "      <td>133.0</td>\n",
       "      <td>15.7</td>\n",
       "      <td>19.10</td>\n",
       "      <td>36.2</td>\n",
       "      <td>319.0</td>\n",
       "      <td>24.1</td>\n",
       "      <td>37.2</td>\n",
       "      <td>17.20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7114</th>\n",
       "      <td>2.23</td>\n",
       "      <td>2.71</td>\n",
       "      <td>10.20</td>\n",
       "      <td>40.7</td>\n",
       "      <td>50.0</td>\n",
       "      <td>21.90</td>\n",
       "      <td>29.2</td>\n",
       "      <td>403.0</td>\n",
       "      <td>25.7</td>\n",
       "      <td>505.0</td>\n",
       "      <td>9.22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7253</th>\n",
       "      <td>34.90</td>\n",
       "      <td>3.02</td>\n",
       "      <td>30.80</td>\n",
       "      <td>24.1</td>\n",
       "      <td>20.4</td>\n",
       "      <td>18.80</td>\n",
       "      <td>19.5</td>\n",
       "      <td>239.0</td>\n",
       "      <td>39.2</td>\n",
       "      <td>28.4</td>\n",
       "      <td>7.99</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7298</th>\n",
       "      <td>58.30</td>\n",
       "      <td>10.40</td>\n",
       "      <td>28.60</td>\n",
       "      <td>25.7</td>\n",
       "      <td>14.2</td>\n",
       "      <td>29.70</td>\n",
       "      <td>27.6</td>\n",
       "      <td>237.0</td>\n",
       "      <td>24.1</td>\n",
       "      <td>2017.0</td>\n",
       "      <td>1.22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7337</th>\n",
       "      <td>58.80</td>\n",
       "      <td>1.81</td>\n",
       "      <td>12.70</td>\n",
       "      <td>231.0</td>\n",
       "      <td>35.2</td>\n",
       "      <td>9.31</td>\n",
       "      <td>13.7</td>\n",
       "      <td>302.0</td>\n",
       "      <td>63.2</td>\n",
       "      <td>2525.0</td>\n",
       "      <td>12.60</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7464</th>\n",
       "      <td>34.60</td>\n",
       "      <td>7.10</td>\n",
       "      <td>5.73</td>\n",
       "      <td>20.7</td>\n",
       "      <td>15.1</td>\n",
       "      <td>32.20</td>\n",
       "      <td>41.4</td>\n",
       "      <td>813.0</td>\n",
       "      <td>44.5</td>\n",
       "      <td>1382.0</td>\n",
       "      <td>2.44</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1851 rows × 11 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        praf   pmek   plcg   PIP2  PIP3  p44/42  pakts473    PKA   PKC  \\\n",
       "8     104.00  61.50  10.60   21.1  41.8   11.50      23.5  445.0  29.2   \n",
       "11     74.30  22.90   7.50   15.5  26.2   20.90      36.5  389.0  31.9   \n",
       "15     50.00  13.80  11.90   13.2  11.3   18.10      27.9  392.0  56.2   \n",
       "16     26.20  26.70  21.30   10.9  14.7    9.06      37.9   89.0  40.0   \n",
       "18     30.50  19.80   7.50  133.0  15.7   19.10      36.2  319.0  24.1   \n",
       "...      ...    ...    ...    ...   ...     ...       ...    ...   ...   \n",
       "7114    2.23   2.71  10.20   40.7  50.0   21.90      29.2  403.0  25.7   \n",
       "7253   34.90   3.02  30.80   24.1  20.4   18.80      19.5  239.0  39.2   \n",
       "7298   58.30  10.40  28.60   25.7  14.2   29.70      27.6  237.0  24.1   \n",
       "7337   58.80   1.81  12.70  231.0  35.2    9.31      13.7  302.0  63.2   \n",
       "7464   34.60   7.10   5.73   20.7  15.1   32.20      41.4  813.0  44.5   \n",
       "\n",
       "         P38   pjnk  \n",
       "8       61.0  25.30  \n",
       "11      71.0  35.50  \n",
       "15      77.0   1.00  \n",
       "16      65.5   1.42  \n",
       "18      37.2  17.20  \n",
       "...      ...    ...  \n",
       "7114   505.0   9.22  \n",
       "7253    28.4   7.99  \n",
       "7298  2017.0   1.22  \n",
       "7337  2525.0  12.60  \n",
       "7464  1382.0   2.44  \n",
       "\n",
       "[1851 rows x 11 columns]"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "PKC_q"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "72e618f3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.899513776337115"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(PKC_s_idx)/len(PKC_q)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "94669a20",
   "metadata": {},
   "outputs": [],
   "source": [
    "test_idxs = PKC_q.index[PKC_s_idx]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "504c357c",
   "metadata": {},
   "outputs": [],
   "source": [
    "test_df = data[0].iloc[test_idxs]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "1fad4910",
   "metadata": {},
   "outputs": [],
   "source": [
    "train_df = data[0].drop(index=test_idxs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "c5669c74",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>praf</th>\n",
       "      <th>pmek</th>\n",
       "      <th>plcg</th>\n",
       "      <th>PIP2</th>\n",
       "      <th>PIP3</th>\n",
       "      <th>p44/42</th>\n",
       "      <th>pakts473</th>\n",
       "      <th>PKA</th>\n",
       "      <th>PKC</th>\n",
       "      <th>P38</th>\n",
       "      <th>pjnk</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>7466.000000</td>\n",
       "      <td>7466.000000</td>\n",
       "      <td>7466.000000</td>\n",
       "      <td>7466.000000</td>\n",
       "      <td>7466.000000</td>\n",
       "      <td>7466.000000</td>\n",
       "      <td>7466.000000</td>\n",
       "      <td>7466.000000</td>\n",
       "      <td>7466.000000</td>\n",
       "      <td>7466.000000</td>\n",
       "      <td>7466.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>124.071930</td>\n",
       "      <td>145.380962</td>\n",
       "      <td>54.853643</td>\n",
       "      <td>151.120738</td>\n",
       "      <td>27.034962</td>\n",
       "      <td>26.631193</td>\n",
       "      <td>81.167214</td>\n",
       "      <td>625.758588</td>\n",
       "      <td>30.341658</td>\n",
       "      <td>135.014504</td>\n",
       "      <td>73.267503</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>247.528092</td>\n",
       "      <td>377.056214</td>\n",
       "      <td>173.859778</td>\n",
       "      <td>299.347505</td>\n",
       "      <td>43.048160</td>\n",
       "      <td>45.826718</td>\n",
       "      <td>137.766206</td>\n",
       "      <td>644.459352</td>\n",
       "      <td>92.870039</td>\n",
       "      <td>494.768842</td>\n",
       "      <td>215.660634</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>30.800000</td>\n",
       "      <td>16.500000</td>\n",
       "      <td>9.410000</td>\n",
       "      <td>18.300000</td>\n",
       "      <td>9.560000</td>\n",
       "      <td>8.510000</td>\n",
       "      <td>23.300000</td>\n",
       "      <td>276.000000</td>\n",
       "      <td>4.460000</td>\n",
       "      <td>19.300000</td>\n",
       "      <td>8.007500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>53.800000</td>\n",
       "      <td>26.700000</td>\n",
       "      <td>16.500000</td>\n",
       "      <td>52.800000</td>\n",
       "      <td>17.800000</td>\n",
       "      <td>17.200000</td>\n",
       "      <td>37.200000</td>\n",
       "      <td>449.000000</td>\n",
       "      <td>12.700000</td>\n",
       "      <td>30.500000</td>\n",
       "      <td>18.400000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>103.000000</td>\n",
       "      <td>64.400000</td>\n",
       "      <td>27.100000</td>\n",
       "      <td>172.000000</td>\n",
       "      <td>32.800000</td>\n",
       "      <td>32.200000</td>\n",
       "      <td>72.300000</td>\n",
       "      <td>750.000000</td>\n",
       "      <td>23.500000</td>\n",
       "      <td>49.600000</td>\n",
       "      <td>52.800000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>4614.000000</td>\n",
       "      <td>7105.000000</td>\n",
       "      <td>6208.000000</td>\n",
       "      <td>9058.000000</td>\n",
       "      <td>1275.000000</td>\n",
       "      <td>2571.000000</td>\n",
       "      <td>3555.000000</td>\n",
       "      <td>8896.000000</td>\n",
       "      <td>1611.000000</td>\n",
       "      <td>7499.000000</td>\n",
       "      <td>4740.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              praf         pmek         plcg         PIP2         PIP3  \\\n",
       "count  7466.000000  7466.000000  7466.000000  7466.000000  7466.000000   \n",
       "mean    124.071930   145.380962    54.853643   151.120738    27.034962   \n",
       "std     247.528092   377.056214   173.859778   299.347505    43.048160   \n",
       "min       1.000000     1.000000     1.000000     1.000000     1.000000   \n",
       "25%      30.800000    16.500000     9.410000    18.300000     9.560000   \n",
       "50%      53.800000    26.700000    16.500000    52.800000    17.800000   \n",
       "75%     103.000000    64.400000    27.100000   172.000000    32.800000   \n",
       "max    4614.000000  7105.000000  6208.000000  9058.000000  1275.000000   \n",
       "\n",
       "            p44/42     pakts473          PKA          PKC          P38  \\\n",
       "count  7466.000000  7466.000000  7466.000000  7466.000000  7466.000000   \n",
       "mean     26.631193    81.167214   625.758588    30.341658   135.014504   \n",
       "std      45.826718   137.766206   644.459352    92.870039   494.768842   \n",
       "min       1.000000     1.000000     1.000000     1.000000     1.000000   \n",
       "25%       8.510000    23.300000   276.000000     4.460000    19.300000   \n",
       "50%      17.200000    37.200000   449.000000    12.700000    30.500000   \n",
       "75%      32.200000    72.300000   750.000000    23.500000    49.600000   \n",
       "max    2571.000000  3555.000000  8896.000000  1611.000000  7499.000000   \n",
       "\n",
       "              pjnk  \n",
       "count  7466.000000  \n",
       "mean     73.267503  \n",
       "std     215.660634  \n",
       "min       1.000000  \n",
       "25%       8.007500  \n",
       "50%      18.400000  \n",
       "75%      52.800000  \n",
       "max    4740.000000  "
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[0].describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "342875b6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>praf</th>\n",
       "      <th>pmek</th>\n",
       "      <th>plcg</th>\n",
       "      <th>PIP2</th>\n",
       "      <th>PIP3</th>\n",
       "      <th>p44/42</th>\n",
       "      <th>pakts473</th>\n",
       "      <th>PKA</th>\n",
       "      <th>PKC</th>\n",
       "      <th>P38</th>\n",
       "      <th>pjnk</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>5801.000000</td>\n",
       "      <td>5801.000000</td>\n",
       "      <td>5801.000000</td>\n",
       "      <td>5801.000000</td>\n",
       "      <td>5801.000000</td>\n",
       "      <td>5801.000000</td>\n",
       "      <td>5801.000000</td>\n",
       "      <td>5801.000000</td>\n",
       "      <td>5801.000000</td>\n",
       "      <td>5801.000000</td>\n",
       "      <td>5801.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>105.095897</td>\n",
       "      <td>112.033079</td>\n",
       "      <td>24.946128</td>\n",
       "      <td>103.324566</td>\n",
       "      <td>27.956097</td>\n",
       "      <td>23.582529</td>\n",
       "      <td>55.520821</td>\n",
       "      <td>677.158767</td>\n",
       "      <td>12.140979</td>\n",
       "      <td>49.077606</td>\n",
       "      <td>34.386683</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>217.340108</td>\n",
       "      <td>325.497560</td>\n",
       "      <td>63.393769</td>\n",
       "      <td>155.557535</td>\n",
       "      <td>45.228868</td>\n",
       "      <td>48.178200</td>\n",
       "      <td>87.710222</td>\n",
       "      <td>657.695312</td>\n",
       "      <td>37.680995</td>\n",
       "      <td>227.572738</td>\n",
       "      <td>96.995886</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>29.200000</td>\n",
       "      <td>15.100000</td>\n",
       "      <td>9.310000</td>\n",
       "      <td>17.800000</td>\n",
       "      <td>10.200000</td>\n",
       "      <td>7.500000</td>\n",
       "      <td>22.100000</td>\n",
       "      <td>313.000000</td>\n",
       "      <td>2.920000</td>\n",
       "      <td>16.700000</td>\n",
       "      <td>6.260000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>49.100000</td>\n",
       "      <td>24.800000</td>\n",
       "      <td>15.700000</td>\n",
       "      <td>42.200000</td>\n",
       "      <td>18.600000</td>\n",
       "      <td>15.100000</td>\n",
       "      <td>34.300000</td>\n",
       "      <td>474.000000</td>\n",
       "      <td>9.650000</td>\n",
       "      <td>25.500000</td>\n",
       "      <td>14.300000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>87.400000</td>\n",
       "      <td>47.000000</td>\n",
       "      <td>23.900000</td>\n",
       "      <td>139.000000</td>\n",
       "      <td>33.700000</td>\n",
       "      <td>27.400000</td>\n",
       "      <td>58.800000</td>\n",
       "      <td>799.000000</td>\n",
       "      <td>15.700000</td>\n",
       "      <td>35.900000</td>\n",
       "      <td>37.200000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>3820.000000</td>\n",
       "      <td>5829.000000</td>\n",
       "      <td>1843.000000</td>\n",
       "      <td>2943.000000</td>\n",
       "      <td>1275.000000</td>\n",
       "      <td>2571.000000</td>\n",
       "      <td>3555.000000</td>\n",
       "      <td>8896.000000</td>\n",
       "      <td>1611.000000</td>\n",
       "      <td>7499.000000</td>\n",
       "      <td>2864.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              praf         pmek         plcg         PIP2         PIP3  \\\n",
       "count  5801.000000  5801.000000  5801.000000  5801.000000  5801.000000   \n",
       "mean    105.095897   112.033079    24.946128   103.324566    27.956097   \n",
       "std     217.340108   325.497560    63.393769   155.557535    45.228868   \n",
       "min       1.000000     1.000000     1.000000     1.000000     1.000000   \n",
       "25%      29.200000    15.100000     9.310000    17.800000    10.200000   \n",
       "50%      49.100000    24.800000    15.700000    42.200000    18.600000   \n",
       "75%      87.400000    47.000000    23.900000   139.000000    33.700000   \n",
       "max    3820.000000  5829.000000  1843.000000  2943.000000  1275.000000   \n",
       "\n",
       "            p44/42     pakts473          PKA          PKC          P38  \\\n",
       "count  5801.000000  5801.000000  5801.000000  5801.000000  5801.000000   \n",
       "mean     23.582529    55.520821   677.158767    12.140979    49.077606   \n",
       "std      48.178200    87.710222   657.695312    37.680995   227.572738   \n",
       "min       1.000000     1.000000     1.000000     1.000000     1.000000   \n",
       "25%       7.500000    22.100000   313.000000     2.920000    16.700000   \n",
       "50%      15.100000    34.300000   474.000000     9.650000    25.500000   \n",
       "75%      27.400000    58.800000   799.000000    15.700000    35.900000   \n",
       "max    2571.000000  3555.000000  8896.000000  1611.000000  7499.000000   \n",
       "\n",
       "              pjnk  \n",
       "count  5801.000000  \n",
       "mean     34.386683  \n",
       "std      96.995886  \n",
       "min       1.000000  \n",
       "25%       6.260000  \n",
       "50%      14.300000  \n",
       "75%      37.200000  \n",
       "max    2864.000000  "
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_df.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "a737b8af",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>praf</th>\n",
       "      <th>pmek</th>\n",
       "      <th>plcg</th>\n",
       "      <th>PIP2</th>\n",
       "      <th>PIP3</th>\n",
       "      <th>p44/42</th>\n",
       "      <th>pakts473</th>\n",
       "      <th>PKA</th>\n",
       "      <th>PKC</th>\n",
       "      <th>P38</th>\n",
       "      <th>pjnk</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>1665.000000</td>\n",
       "      <td>1665.000000</td>\n",
       "      <td>1665.000000</td>\n",
       "      <td>1665.000000</td>\n",
       "      <td>1665.000000</td>\n",
       "      <td>1665.000000</td>\n",
       "      <td>1665.000000</td>\n",
       "      <td>1665.000000</td>\n",
       "      <td>1665.000000</td>\n",
       "      <td>1665.000000</td>\n",
       "      <td>1665.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>190.186024</td>\n",
       "      <td>261.567790</td>\n",
       "      <td>159.053940</td>\n",
       "      <td>317.646619</td>\n",
       "      <td>23.825652</td>\n",
       "      <td>37.252997</td>\n",
       "      <td>170.521405</td>\n",
       "      <td>446.676042</td>\n",
       "      <td>93.754354</td>\n",
       "      <td>434.425886</td>\n",
       "      <td>208.731550</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>323.407719</td>\n",
       "      <td>501.128252</td>\n",
       "      <td>328.046479</td>\n",
       "      <td>530.985382</td>\n",
       "      <td>34.205234</td>\n",
       "      <td>34.432080</td>\n",
       "      <td>219.200238</td>\n",
       "      <td>560.537569</td>\n",
       "      <td>169.012214</td>\n",
       "      <td>895.676597</td>\n",
       "      <td>390.160563</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.010000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>23.700000</td>\n",
       "      <td>10.200000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>37.500000</td>\n",
       "      <td>21.500000</td>\n",
       "      <td>10.300000</td>\n",
       "      <td>21.300000</td>\n",
       "      <td>7.700000</td>\n",
       "      <td>13.500000</td>\n",
       "      <td>30.000000</td>\n",
       "      <td>13.800000</td>\n",
       "      <td>28.600000</td>\n",
       "      <td>49.600000</td>\n",
       "      <td>19.300000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>79.900000</td>\n",
       "      <td>40.300000</td>\n",
       "      <td>22.900000</td>\n",
       "      <td>142.000000</td>\n",
       "      <td>14.700000</td>\n",
       "      <td>26.400000</td>\n",
       "      <td>61.500000</td>\n",
       "      <td>325.000000</td>\n",
       "      <td>38.500000</td>\n",
       "      <td>80.600000</td>\n",
       "      <td>69.200000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>209.000000</td>\n",
       "      <td>325.000000</td>\n",
       "      <td>193.000000</td>\n",
       "      <td>392.000000</td>\n",
       "      <td>28.400000</td>\n",
       "      <td>50.500000</td>\n",
       "      <td>239.000000</td>\n",
       "      <td>599.000000</td>\n",
       "      <td>73.000000</td>\n",
       "      <td>400.000000</td>\n",
       "      <td>209.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>4614.000000</td>\n",
       "      <td>7105.000000</td>\n",
       "      <td>6208.000000</td>\n",
       "      <td>9058.000000</td>\n",
       "      <td>764.000000</td>\n",
       "      <td>392.000000</td>\n",
       "      <td>1499.000000</td>\n",
       "      <td>4698.000000</td>\n",
       "      <td>1358.000000</td>\n",
       "      <td>6916.000000</td>\n",
       "      <td>4740.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              praf         pmek         plcg         PIP2         PIP3  \\\n",
       "count  1665.000000  1665.000000  1665.000000  1665.000000  1665.000000   \n",
       "mean    190.186024   261.567790   159.053940   317.646619    23.825652   \n",
       "std     323.407719   501.128252   328.046479   530.985382    34.205234   \n",
       "min       1.000000     1.000000     1.000000     1.000000     1.000000   \n",
       "25%      37.500000    21.500000    10.300000    21.300000     7.700000   \n",
       "50%      79.900000    40.300000    22.900000   142.000000    14.700000   \n",
       "75%     209.000000   325.000000   193.000000   392.000000    28.400000   \n",
       "max    4614.000000  7105.000000  6208.000000  9058.000000   764.000000   \n",
       "\n",
       "            p44/42     pakts473          PKA          PKC          P38  \\\n",
       "count  1665.000000  1665.000000  1665.000000  1665.000000  1665.000000   \n",
       "mean     37.252997   170.521405   446.676042    93.754354   434.425886   \n",
       "std      34.432080   219.200238   560.537569   169.012214   895.676597   \n",
       "min       1.000000     1.010000     1.000000    23.700000    10.200000   \n",
       "25%      13.500000    30.000000    13.800000    28.600000    49.600000   \n",
       "50%      26.400000    61.500000   325.000000    38.500000    80.600000   \n",
       "75%      50.500000   239.000000   599.000000    73.000000   400.000000   \n",
       "max     392.000000  1499.000000  4698.000000  1358.000000  6916.000000   \n",
       "\n",
       "              pjnk  \n",
       "count  1665.000000  \n",
       "mean    208.731550  \n",
       "std     390.160563  \n",
       "min       1.000000  \n",
       "25%      19.300000  \n",
       "50%      69.200000  \n",
       "75%     209.000000  \n",
       "max    4740.000000  "
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_df.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "4d5e287c",
   "metadata": {},
   "outputs": [],
   "source": [
    "train_df.to_csv(\"sachs_train.csv\", index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "ad28deb4",
   "metadata": {},
   "outputs": [],
   "source": [
    "test_df.to_csv(\"sachs_test.csv\", index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "e765b207",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "idx 0, column: praf\n",
      "idx 1, column: pmek\n",
      "idx 2, column: plcg\n",
      "idx 3, column: PIP2\n",
      "idx 4, column: PIP3\n",
      "idx 5, column: p44/42\n",
      "idx 6, column: pakts473\n",
      "idx 7, column: PKA\n",
      "idx 8, column: PKC\n",
      "idx 9, column: P38\n",
      "idx 10, column: pjnk\n"
     ]
    }
   ],
   "source": [
    "for i, col in enumerate(train_df.columns):\n",
    "    print(f\"idx {i}, column: {col}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ef68628c",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "tabular-ml-eval",
   "language": "python",
   "name": "tabular-ml-eval"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
