{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1208,
   "id": "7c6de070-5888-4c35-9110-356afd351d4d",
   "metadata": {},
   "outputs": [],
   "source": [
    "from typing import List, Tuple, Dict\n",
    "from copy import copy\n",
    "\n",
    "from tqdm import tqdm_notebook\n",
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1209,
   "id": "efeeac40-cf61-4291-8351-ac06adac5ef8",
   "metadata": {},
   "outputs": [],
   "source": [
    "b_test = pd.read_csv('b_train.csv',header=None)\n",
    "c_test = pd.read_csv('c_train.csv',header=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1210,
   "id": "6124ffc4-fb18-4410-805a-7f42b7b4b40e",
   "metadata": {},
   "outputs": [],
   "source": [
    "c_test = c_test.transpose() \n",
    "b_test = b_test.transpose() "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1211,
   "id": "80e9e640-cd72-4899-a66c-2d7207781ee0",
   "metadata": {},
   "outputs": [],
   "source": [
    "c_test = c_test.values\n",
    "b_test = b_test.values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1212,
   "id": "4ebad5b4-a6c6-4b57-8be8-2a4522471518",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(11, 239400)"
      ]
     },
     "execution_count": 1212,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "c_test.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1213,
   "id": "daee0e57-a184-4d4e-9e1d-db3b7eee139c",
   "metadata": {},
   "outputs": [],
   "source": [
    "test_backdoor = b_test[:-1, :]\n",
    "test_clean = c_test[:-1, :]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1214,
   "id": "402168c0-f4f2-4d4c-86ae-4b616fb84cab",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(10, 239400)"
      ]
     },
     "execution_count": 1214,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_backdoor.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1215,
   "id": "cbfdf24c-c7d5-4bdf-a804-8016a43e8245",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(30, 79800)\n"
     ]
    }
   ],
   "source": [
    "sub_matrices = np.split(test_clean, 30, axis=1)\n",
    "\n",
    "# 将子矩阵展开成 15 个 (1, 40000) 的矩阵\n",
    "flat_matrices = [sub_matrix.reshape(1, -1) for sub_matrix in sub_matrices]\n",
    "\n",
    "# 合并展开后的矩阵为一个形状为 (15, 40000) 的矩阵\n",
    "test_c = np.concatenate(flat_matrices)\n",
    "\n",
    "print(test_c.shape)  # 输出 (15, 40000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1216,
   "id": "37bcd0f7-c976-4264-9bd7-a7667524e43b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(30, 79800)\n"
     ]
    }
   ],
   "source": [
    "sub_matrices = np.split(test_backdoor, 30, axis=1)\n",
    "\n",
    "# 将子矩阵展开成 15 个 (1, 40000) 的矩阵\n",
    "flat_matrices = [sub_matrix.reshape(1, -1) for sub_matrix in sub_matrices]\n",
    "\n",
    "# 合并展开后的矩阵为一个形状为 (15, 40000) 的矩阵\n",
    "test_b = np.concatenate(flat_matrices)\n",
    "\n",
    "print(test_b.shape)  # 输出 (15, 40000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1217,
   "id": "6eb23a96-e9aa-449a-bdd3-e70089936497",
   "metadata": {},
   "outputs": [],
   "source": [
    "res_clean  = np.concatenate((test_c,  np.ones((len(test_c[: ,-1]), 1))), axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1218,
   "id": "c86267f8-14ac-4d5d-9145-d3be21b7b068",
   "metadata": {},
   "outputs": [],
   "source": [
    "res_backdoor  = np.concatenate((test_b,  np.zeros((len(test_b[: ,-1]), 1))), axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1219,
   "id": "fb5426f2-152f-403b-9ac0-c863901290ca",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(30, 79801)"
      ]
     },
     "execution_count": 1219,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "res_backdoor.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1220,
   "id": "5adce6ab-0aef-4470-9196-2567260cf43b",
   "metadata": {},
   "outputs": [],
   "source": [
    "model_results = np.vstack((res_clean, res_backdoor))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1221,
   "id": "06248e65-78d2-42b3-b3d1-9fc32b181fb7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(60, 79801)"
      ]
     },
     "execution_count": 1221,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_results.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1222,
   "id": "21572eaa-0935-42ee-89fd-0ccbb8c12e08",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.ensemble import RandomForestClassifier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1223,
   "id": "41df8cd2-f5af-40af-9f7a-71ca438a42ba",
   "metadata": {},
   "outputs": [],
   "source": [
    "model_label = model_results[:, -1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1224,
   "id": "c3de4ead-2916-4075-8782-03a9f8105ab2",
   "metadata": {},
   "outputs": [],
   "source": [
    "model = model_results[:, :-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1225,
   "id": "c02cd648-fb3d-41df-81f5-0572215345a9",
   "metadata": {},
   "outputs": [],
   "source": [
    "forest = RandomForestClassifier(n_estimators = 100000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1226,
   "id": "a6c15ef2-632d-4abe-ad70-e06dc90ee9ea",
   "metadata": {},
   "outputs": [],
   "source": [
    "forest_fit = forest.fit(model,model_label)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1227,
   "id": "d6bc881f-8472-4b4c-9b84-e33d3731f1a8",
   "metadata": {},
   "outputs": [],
   "source": [
    "b = pd.read_csv('b_train_test.csv',header=None)\n",
    "c = pd.read_csv('c_train_test.csv',header=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1228,
   "id": "4d798e6b-6c90-4f98-95ee-c6ca058ed8b7",
   "metadata": {},
   "outputs": [],
   "source": [
    "c = c.transpose() \n",
    "b = b.transpose() "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1229,
   "id": "fb77c8e0-5b06-4a8e-9032-4d9795c90a7d",
   "metadata": {},
   "outputs": [],
   "source": [
    "c = c.values \n",
    "b = b.values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1230,
   "id": "1a22058d-003f-49a1-a7fd-fcf4611e89c5",
   "metadata": {},
   "outputs": [],
   "source": [
    "test_backdoor = b[:-1, :]\n",
    "test_clean = c[:-1, :]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1231,
   "id": "922f0533-0ac9-4f00-a804-4b03a6b9499b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(11, 79800)"
      ]
     },
     "execution_count": 1231,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "c.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1232,
   "id": "d3c84e0a-13dd-428e-a9e6-040bc471d260",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(10, 79800)\n"
     ]
    }
   ],
   "source": [
    "sub_matrices = np.split(test_clean, 10, axis=1)\n",
    "\n",
    "# 将子矩阵展开成 15 个 (1, 40000) 的矩阵\n",
    "flat_matrices = [sub_matrix.reshape(1, -1) for sub_matrix in sub_matrices]\n",
    "\n",
    "# 合并展开后的矩阵为一个形状为 (15, 40000) 的矩阵\n",
    "c = np.concatenate(flat_matrices)\n",
    "\n",
    "print(c.shape)  # 输出 (15, 40000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1233,
   "id": "edfa798c-a444-4f67-a9a9-89b68d7bcd86",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(10, 79800)\n"
     ]
    }
   ],
   "source": [
    "sub_matrices = np.split(test_backdoor, 10, axis=1)\n",
    "\n",
    "# 将子矩阵展开成 15 个 (1, 40000) 的矩阵\n",
    "flat_matrices = [sub_matrix.reshape(1, -1) for sub_matrix in sub_matrices]\n",
    "\n",
    "# 合并展开后的矩阵为一个形状为 (15, 40000) 的矩阵\n",
    "b = np.concatenate(flat_matrices)\n",
    "\n",
    "print(b.shape)  # 输出 (15, 40000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1234,
   "id": "980df9a8-f159-46cc-8abe-a5ccfb9c7aeb",
   "metadata": {},
   "outputs": [],
   "source": [
    "mem_res_clean = forest.predict(c)\n",
    "mem_res_backdoor = forest.predict(b)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1235,
   "id": "0802592d-0130-408a-b6c9-b91805d11291",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, confusion_matrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1236,
   "id": "7b7493d8-3d7a-4c26-bbe0-7720f7343b76",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = np.hstack((mem_res_clean, mem_res_backdoor))\n",
    "y_true = np.concatenate((np.ones_like(mem_res_clean), np.zeros_like(mem_res_backdoor)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1237,
   "id": "3af6efab-5a6b-4a50-9e7b-c75ef562287e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9"
      ]
     },
     "execution_count": 1237,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "accuracy_score(y_true, y_pred)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1238,
   "id": "6fe236dd-46cc-4979-b5cf-4ffbea7755f0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.8333333333333334"
      ]
     },
     "execution_count": 1238,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "precision_score(y_true, y_pred)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1239,
   "id": "545cd8bb-9790-4212-9a47-1606c78f2ee6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9090909090909091"
      ]
     },
     "execution_count": 1239,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "f1_score(y_true, y_pred)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1240,
   "id": "e45c71ea-2ad8-4416-88b7-bf9c6231f89b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0.,\n",
       "       0., 0., 0.])"
      ]
     },
     "execution_count": 1240,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_true"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1241,
   "id": "5473e9ae-0446-498c-8c08-7bb81af31cf3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 0., 0., 0., 0.,\n",
       "       0., 0., 0.])"
      ]
     },
     "execution_count": 1241,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_pred"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4ddf70a2-6760-4e85-9660-7611c3bd3d58",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:.conda-TomTestEnv] *",
   "language": "python",
   "name": "conda-env-.conda-TomTestEnv-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
