{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "7c6de070-5888-4c35-9110-356afd351d4d",
   "metadata": {},
   "outputs": [],
   "source": [
    "from typing import List, Tuple, Dict\n",
    "from copy import copy\n",
    "\n",
    "from tqdm import tqdm_notebook\n",
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "efeeac40-cf61-4291-8351-ac06adac5ef8",
   "metadata": {},
   "outputs": [],
   "source": [
    "b_test = pd.read_csv('b_train.csv',header=None)\n",
    "c_test = pd.read_csv('c_train.csv',header=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "6124ffc4-fb18-4410-805a-7f42b7b4b40e",
   "metadata": {},
   "outputs": [],
   "source": [
    "c_test = c_test.transpose() \n",
    "b_test = b_test.transpose() "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "80e9e640-cd72-4899-a66c-2d7207781ee0",
   "metadata": {},
   "outputs": [],
   "source": [
    "c_test = c_test.values\n",
    "b_test = b_test.values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "4ebad5b4-a6c6-4b57-8be8-2a4522471518",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((11, 96000), (11, 96000))"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "c_test.shape, b_test.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "5f68f2f1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "12.0"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "96000 / 8000"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "daee0e57-a184-4d4e-9e1d-db3b7eee139c",
   "metadata": {},
   "outputs": [],
   "source": [
    "test_backdoor = b_test[:-1, :]\n",
    "test_clean = c_test[:-1, :]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "402168c0-f4f2-4d4c-86ae-4b616fb84cab",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((10, 96000), (10, 96000))"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_backdoor.shape, test_clean.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4000.0"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "96000 / 24"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "cbfdf24c-c7d5-4bdf-a804-8016a43e8245",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(24, 40000)\n"
     ]
    }
   ],
   "source": [
    "sub_matrices = np.split(test_clean, 24, axis=1)\n",
    "\n",
    "# 将子矩阵展开成 15 个 (1, 40000) 的矩阵\n",
    "flat_matrices = [sub_matrix.reshape(1, -1) for sub_matrix in sub_matrices]\n",
    "\n",
    "# 合并展开后的矩阵为一个形状为 (15, 40000) 的矩阵\n",
    "test_c = np.concatenate(flat_matrices)\n",
    "\n",
    "print(test_c.shape)  # 输出 (15, 40000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "37bcd0f7-c976-4264-9bd7-a7667524e43b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(24, 40000)\n"
     ]
    }
   ],
   "source": [
    "sub_matrices = np.split(test_backdoor, 24, axis=1)\n",
    "\n",
    "# 将子矩阵展开成 15 个 (1, 40000) 的矩阵\n",
    "flat_matrices = [sub_matrix.reshape(1, -1) for sub_matrix in sub_matrices]\n",
    "\n",
    "# 合并展开后的矩阵为一个形状为 (15, 40000) 的矩阵\n",
    "test_b = np.concatenate(flat_matrices)\n",
    "\n",
    "print(test_b.shape)  # 输出 (15, 40000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "6eb23a96-e9aa-449a-bdd3-e70089936497",
   "metadata": {},
   "outputs": [],
   "source": [
    "res_clean  = np.concatenate((test_c,  np.ones((len(test_c[: ,-1]), 1))), axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "c86267f8-14ac-4d5d-9145-d3be21b7b068",
   "metadata": {},
   "outputs": [],
   "source": [
    "res_backdoor  = np.concatenate((test_b,  np.zeros((len(test_b[: ,-1]), 1))), axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "fb5426f2-152f-403b-9ac0-c863901290ca",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((24, 40001), (24, 40001))"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "res_clean.shape, res_backdoor.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "5adce6ab-0aef-4470-9196-2567260cf43b",
   "metadata": {},
   "outputs": [],
   "source": [
    "model_results = np.vstack((res_clean, res_backdoor))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "06248e65-78d2-42b3-b3d1-9fc32b181fb7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(48, 40001)"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_results.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "21572eaa-0935-42ee-89fd-0ccbb8c12e08",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.ensemble import RandomForestClassifier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "41df8cd2-f5af-40af-9f7a-71ca438a42ba",
   "metadata": {},
   "outputs": [],
   "source": [
    "model_label = model_results[:, -1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "c3de4ead-2916-4075-8782-03a9f8105ab2",
   "metadata": {},
   "outputs": [],
   "source": [
    "model = model_results[:, :-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "c02cd648-fb3d-41df-81f5-0572215345a9",
   "metadata": {},
   "outputs": [],
   "source": [
    "forest = RandomForestClassifier(n_estimators = 100000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "a6c15ef2-632d-4abe-ad70-e06dc90ee9ea",
   "metadata": {},
   "outputs": [],
   "source": [
    "forest_fit = forest.fit(model,model_label)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "d6bc881f-8472-4b4c-9b84-e33d3731f1a8",
   "metadata": {},
   "outputs": [],
   "source": [
    "b = pd.read_csv('./final-csv/b_train_test.csv',header=None)\n",
    "c = pd.read_csv('./final-csv/c_train_test.csv',header=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "4d798e6b-6c90-4f98-95ee-c6ca058ed8b7",
   "metadata": {},
   "outputs": [],
   "source": [
    "c = c.transpose() \n",
    "b = b.transpose() "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((11, 79800), (11, 79800))"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "c.shape, b.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "fb77c8e0-5b06-4a8e-9032-4d9795c90a7d",
   "metadata": {},
   "outputs": [],
   "source": [
    "c = c.values \n",
    "b = b.values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "1a22058d-003f-49a1-a7fd-fcf4611e89c5",
   "metadata": {},
   "outputs": [],
   "source": [
    "test_backdoor = b[:-1, :]\n",
    "test_clean = c[:-1, :]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "922f0533-0ac9-4f00-a804-4b03a6b9499b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(11, 79800)"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "c.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "d3c84e0a-13dd-428e-a9e6-040bc471d260",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(10, 79800)\n"
     ]
    }
   ],
   "source": [
    "sub_matrices = np.split(test_clean, 10, axis=1)\n",
    "\n",
    "# 将子矩阵展开成 15 个 (1, 40000) 的矩阵\n",
    "flat_matrices = [sub_matrix.reshape(1, -1) for sub_matrix in sub_matrices]\n",
    "\n",
    "# 合并展开后的矩阵为一个形状为 (15, 40000) 的矩阵\n",
    "c = np.concatenate(flat_matrices)\n",
    "\n",
    "print(c.shape)  # 输出 (15, 40000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "edfa798c-a444-4f67-a9a9-89b68d7bcd86",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(10, 79800)\n"
     ]
    }
   ],
   "source": [
    "sub_matrices = np.split(test_backdoor, 10, axis=1)\n",
    "\n",
    "# 将子矩阵展开成 15 个 (1, 40000) 的矩阵\n",
    "flat_matrices = [sub_matrix.reshape(1, -1) for sub_matrix in sub_matrices]\n",
    "\n",
    "# 合并展开后的矩阵为一个形状为 (15, 40000) 的矩阵\n",
    "b = np.concatenate(flat_matrices)\n",
    "\n",
    "print(b.shape)  # 输出 (15, 40000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "980df9a8-f159-46cc-8abe-a5ccfb9c7aeb",
   "metadata": {},
   "outputs": [
    {
     "ename": "ValueError",
     "evalue": "X has 79800 features, but RandomForestClassifier is expecting 40000 features as input.",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[1;32m/home/iis519409/github/test/Reprogramming/stl10.ipynb Cell 29\u001b[0m line \u001b[0;36m1\n\u001b[0;32m----> <a href='vscode-notebook-cell://ssh-remote%2B203.145.216.211/home/iis519409/github/test/Reprogramming/stl10.ipynb#X40sdnNjb2RlLXJlbW90ZQ%3D%3D?line=0'>1</a>\u001b[0m mem_res_clean \u001b[39m=\u001b[39m forest\u001b[39m.\u001b[39;49mpredict(c)\n\u001b[1;32m      <a href='vscode-notebook-cell://ssh-remote%2B203.145.216.211/home/iis519409/github/test/Reprogramming/stl10.ipynb#X40sdnNjb2RlLXJlbW90ZQ%3D%3D?line=1'>2</a>\u001b[0m mem_res_backdoor \u001b[39m=\u001b[39m forest\u001b[39m.\u001b[39mpredict(b)\n",
      "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/sklearn/ensemble/_forest.py:821\u001b[0m, in \u001b[0;36mForestClassifier.predict\u001b[0;34m(self, X)\u001b[0m\n\u001b[1;32m    800\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mpredict\u001b[39m(\u001b[39mself\u001b[39m, X):\n\u001b[1;32m    801\u001b[0m \u001b[39m    \u001b[39m\u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m    802\u001b[0m \u001b[39m    Predict class for X.\u001b[39;00m\n\u001b[1;32m    803\u001b[0m \n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    819\u001b[0m \u001b[39m        The predicted classes.\u001b[39;00m\n\u001b[1;32m    820\u001b[0m \u001b[39m    \"\"\"\u001b[39;00m\n\u001b[0;32m--> 821\u001b[0m     proba \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mpredict_proba(X)\n\u001b[1;32m    823\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mn_outputs_ \u001b[39m==\u001b[39m \u001b[39m1\u001b[39m:\n\u001b[1;32m    824\u001b[0m         \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mclasses_\u001b[39m.\u001b[39mtake(np\u001b[39m.\u001b[39margmax(proba, axis\u001b[39m=\u001b[39m\u001b[39m1\u001b[39m), axis\u001b[39m=\u001b[39m\u001b[39m0\u001b[39m)\n",
      "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/sklearn/ensemble/_forest.py:863\u001b[0m, in \u001b[0;36mForestClassifier.predict_proba\u001b[0;34m(self, X)\u001b[0m\n\u001b[1;32m    861\u001b[0m check_is_fitted(\u001b[39mself\u001b[39m)\n\u001b[1;32m    862\u001b[0m \u001b[39m# Check data\u001b[39;00m\n\u001b[0;32m--> 863\u001b[0m X \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_validate_X_predict(X)\n\u001b[1;32m    865\u001b[0m \u001b[39m# Assign chunk of trees to jobs\u001b[39;00m\n\u001b[1;32m    866\u001b[0m n_jobs, _, _ \u001b[39m=\u001b[39m _partition_estimators(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mn_estimators, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mn_jobs)\n",
      "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/sklearn/ensemble/_forest.py:603\u001b[0m, in \u001b[0;36mBaseForest._validate_X_predict\u001b[0;34m(self, X)\u001b[0m\n\u001b[1;32m    600\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m    601\u001b[0m \u001b[39mValidate X whenever one tries to predict, apply, predict_proba.\"\"\"\u001b[39;00m\n\u001b[1;32m    602\u001b[0m check_is_fitted(\u001b[39mself\u001b[39m)\n\u001b[0;32m--> 603\u001b[0m X \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_validate_data(X, dtype\u001b[39m=\u001b[39;49mDTYPE, accept_sparse\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mcsr\u001b[39;49m\u001b[39m\"\u001b[39;49m, reset\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m)\n\u001b[1;32m    604\u001b[0m \u001b[39mif\u001b[39;00m issparse(X) \u001b[39mand\u001b[39;00m (X\u001b[39m.\u001b[39mindices\u001b[39m.\u001b[39mdtype \u001b[39m!=\u001b[39m np\u001b[39m.\u001b[39mintc \u001b[39mor\u001b[39;00m X\u001b[39m.\u001b[39mindptr\u001b[39m.\u001b[39mdtype \u001b[39m!=\u001b[39m np\u001b[39m.\u001b[39mintc):\n\u001b[1;32m    605\u001b[0m     \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mNo support for np.int64 index based sparse matrices\u001b[39m\u001b[39m\"\u001b[39m)\n",
      "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/sklearn/base.py:558\u001b[0m, in \u001b[0;36mBaseEstimator._validate_data\u001b[0;34m(self, X, y, reset, validate_separately, **check_params)\u001b[0m\n\u001b[1;32m    555\u001b[0m     out \u001b[39m=\u001b[39m X, y\n\u001b[1;32m    557\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m no_val_X \u001b[39mand\u001b[39;00m check_params\u001b[39m.\u001b[39mget(\u001b[39m\"\u001b[39m\u001b[39mensure_2d\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mTrue\u001b[39;00m):\n\u001b[0;32m--> 558\u001b[0m     \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_check_n_features(X, reset\u001b[39m=\u001b[39;49mreset)\n\u001b[1;32m    560\u001b[0m \u001b[39mreturn\u001b[39;00m out\n",
      "File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/sklearn/base.py:359\u001b[0m, in \u001b[0;36mBaseEstimator._check_n_features\u001b[0;34m(self, X, reset)\u001b[0m\n\u001b[1;32m    356\u001b[0m     \u001b[39mreturn\u001b[39;00m\n\u001b[1;32m    358\u001b[0m \u001b[39mif\u001b[39;00m n_features \u001b[39m!=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mn_features_in_:\n\u001b[0;32m--> 359\u001b[0m     \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[1;32m    360\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mX has \u001b[39m\u001b[39m{\u001b[39;00mn_features\u001b[39m}\u001b[39;00m\u001b[39m features, but \u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__class__\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__name__\u001b[39m\u001b[39m}\u001b[39;00m\u001b[39m \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m    361\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mis expecting \u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mn_features_in_\u001b[39m}\u001b[39;00m\u001b[39m features as input.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m    362\u001b[0m     )\n",
      "\u001b[0;31mValueError\u001b[0m: X has 79800 features, but RandomForestClassifier is expecting 40000 features as input."
     ]
    }
   ],
   "source": [
    "mem_res_clean = forest.predict(c)\n",
    "mem_res_backdoor = forest.predict(b)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0802592d-0130-408a-b6c9-b91805d11291",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, confusion_matrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7b7493d8-3d7a-4c26-bbe0-7720f7343b76",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = np.hstack((mem_res_clean, mem_res_backdoor))\n",
    "y_true = np.concatenate((np.ones_like(mem_res_clean), np.zeros_like(mem_res_backdoor)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3af6efab-5a6b-4a50-9e7b-c75ef562287e",
   "metadata": {},
   "outputs": [],
   "source": [
    "accuracy_score(y_true, y_pred)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6fe236dd-46cc-4979-b5cf-4ffbea7755f0",
   "metadata": {},
   "outputs": [],
   "source": [
    "precision_score(y_true, y_pred)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "545cd8bb-9790-4212-9a47-1606c78f2ee6",
   "metadata": {},
   "outputs": [],
   "source": [
    "f1_score(y_true, y_pred)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e45c71ea-2ad8-4416-88b7-bf9c6231f89b",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_true"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5473e9ae-0446-498c-8c08-7bb81af31cf3",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4ddf70a2-6760-4e85-9660-7611c3bd3d58",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
