{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "63d47126",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-11-19T04:46:21.433424Z",
     "start_time": "2024-11-19T04:46:21.430473Z"
    }
   },
   "outputs": [],
   "source": [
    "import os\n",
    "os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "690d7578",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-11-19T04:46:23.902058Z",
     "start_time": "2024-11-19T04:46:21.435648Z"
    }
   },
   "outputs": [],
   "source": [
    "from Linear_Separability_numpy import *\n",
    "import tensorflow as tf\n",
    "from tensorflow import keras\n",
    "import numpy as np\n",
    "import matplotlib as mpl\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e00f39f3",
   "metadata": {},
   "source": [
    "# Load data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "07734a47",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-11-19T04:46:27.379028Z",
     "start_time": "2024-11-19T04:46:23.904958Z"
    },
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "from ucimlrepo import fetch_ucirepo\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "import pandas as pd \n",
    "\n",
    "bank_marketing = fetch_ucirepo(id=222)\n",
    "\n",
    "X = bank_marketing.data.features\n",
    "y = bank_marketing.data.targets['y']\n",
    "\n",
    "y = y.map({'yes': 1, 'no': 0})\n",
    "\n",
    "num_positive = np.sum(y == 1)\n",
    "num_negative = np.sum(y == 0)\n",
    "print(\"Number of positive samples (Malignant):\", num_positive)\n",
    "print(\"Number of negative samples (Benign):\", num_negative)\n",
    "\n",
    "X = pd.get_dummies(X, drop_first=True)\n",
    "\n",
    "scaler = StandardScaler()\n",
    "all_features = scaler.fit_transform(X)\n",
    "\n",
    "all_labels = y.values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0ea656b8",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-11-19T04:46:27.414623Z",
     "start_time": "2024-11-19T04:46:27.384435Z"
    }
   },
   "outputs": [],
   "source": [
    "train_features, test_features, train_labels, test_labels = train_test_split(all_features, all_labels, test_size=0.2, random_state=42)\n",
    "\n",
    "def random_undersample(X, y):\n",
    "\n",
    "    unique, counts = np.unique(y, return_counts=True)\n",
    "    min_count = np.min(counts)\n",
    "\n",
    "    X_resampled, y_resampled = [], []\n",
    "    for label in unique:\n",
    "        X_class = X[y == label]\n",
    "        y_class = y[y == label]\n",
    "\n",
    "        indices = np.random.choice(len(X_class), min_count, replace=False)\n",
    "        X_resampled.append(X_class[indices])\n",
    "        y_resampled.append(y_class[indices])\n",
    "\n",
    "    X_resampled = np.vstack(X_resampled)\n",
    "    y_resampled = np.hstack(y_resampled)\n",
    "\n",
    "    return X_resampled, y_resampled\n",
    "\n",
    "train_features, train_labels = random_undersample(train_features, train_labels)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "984c77a6",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-11-19T04:46:27.421722Z",
     "start_time": "2024-11-19T04:46:27.417144Z"
    }
   },
   "outputs": [],
   "source": [
    "subset_size = 1000\n",
    "\n",
    "indices = np.random.choice(len(train_features), size=subset_size, replace=False)\n",
    "\n",
    "subset_features = train_features[indices]\n",
    "subset_labels = train_labels[indices]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "01b3580c",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-11-19T04:46:27.430433Z",
     "start_time": "2024-11-19T04:46:27.424365Z"
    }
   },
   "outputs": [],
   "source": [
    "train_features.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "148db3bf",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-11-19T04:46:27.437879Z",
     "start_time": "2024-11-19T04:46:27.432639Z"
    }
   },
   "outputs": [],
   "source": [
    "unique, counts = np.unique(train_labels, return_counts=True)\n",
    "positive_count = counts[unique == 1][0] if 1 in unique else 0\n",
    "negative_count = counts[unique == 0][0] if 0 in unique else 0\n",
    "print(f\"Training labels - Positive count: {positive_count}, Negative count: {negative_count}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a83e84bb",
   "metadata": {},
   "source": [
    "# Train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3b604340",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-11-19T04:46:27.442320Z",
     "start_time": "2024-11-19T04:46:27.439821Z"
    }
   },
   "outputs": [],
   "source": [
    "# parameters\n",
    "batch_size = 32"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a5e11e4f",
   "metadata": {},
   "source": [
    "# network structure"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b70a3124",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-11-19T04:46:30.325893Z",
     "start_time": "2024-11-19T04:46:27.451281Z"
    }
   },
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "\n",
    "train_dataset = tf.data.Dataset.from_tensor_slices((train_features, train_labels)).batch(batch_size).shuffle(buffer_size=1000)\n",
    "test_dataset = tf.data.Dataset.from_tensor_slices((test_features, test_labels)).batch(batch_size)\n",
    "\n",
    "model = tf.keras.models.Sequential([\n",
    "    tf.keras.layers.InputLayer(input_shape=(all_features.shape[1],)),  \n",
    "    tf.keras.layers.Dense(100, activation='relu'),\n",
    "    tf.keras.layers.Dense(100, activation='relu'),\n",
    "    tf.keras.layers.Dense(100, activation='relu'),\n",
    "    tf.keras.layers.Dense(100, activation='relu'),\n",
    "    tf.keras.layers.Dense(100, activation='relu'),\n",
    "    tf.keras.layers.Dense(1, activation='sigmoid')  \n",
    "])\n",
    "\n",
    "model.compile(loss='binary_crossentropy',\n",
    "              optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),\n",
    "              metrics=['accuracy'])\n",
    "\n",
    "layer_outputs = [\n",
    "    layer.output for layer in model.layers if ('dropout' not in layer.name)\n",
    "]\n",
    "activation_model = tf.keras.models.Model(inputs=model.input, outputs=layer_outputs)\n",
    "\n",
    "activations = activation_model.predict(test_features)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "da1212c2",
   "metadata": {},
   "source": [
    "# initial record matrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4a14e153",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-11-19T04:46:30.478605Z",
     "start_time": "2024-11-19T04:46:30.330735Z"
    }
   },
   "outputs": [],
   "source": [
    "num_epochs = 100\n",
    "batch_ep = 1\n",
    "x_plot = np.arange(num_epochs)*batch_ep\n",
    "reserved_layers = 0\n",
    "\n",
    "for Separability_image,Separability_label in train_dataset:\n",
    "    break\n",
    "Separability_label = tf.reshape(Separability_label, (-1, 1))\n",
    "\n",
    "x=activation_model.predict(Separability_image)\n",
    "\n",
    "LS_star_squence = np.zeros((len(x)-reserved_layers,num_epochs))\n",
    "LS_0_squence = np.zeros((len(x)-reserved_layers,num_epochs))\n",
    "LS_1_squence = np.zeros((len(x)-reserved_layers,num_epochs))\n",
    "LS_2_squence = np.zeros((len(x)-reserved_layers,num_epochs))\n",
    "LDA_squence = np.zeros((len(x)-reserved_layers,num_epochs))\n",
    "\n",
    "LS_star_squence_base = np.zeros((1,num_epochs))\n",
    "LS_0_squence_base = np.zeros((1,num_epochs))\n",
    "LS_1_squence_base = np.zeros((1,num_epochs))\n",
    "LS_2_squence_base = np.zeros((1,num_epochs))\n",
    "LDA_squence_base = np.zeros((1,num_epochs))\n",
    "\n",
    "train_loss_squence = np.zeros((num_epochs,))\n",
    "train_accuracy_squence = np.zeros((num_epochs,))\n",
    "test_loss_squence = np.zeros((num_epochs,))\n",
    "test_accuracy_squence = np.zeros((num_epochs,))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5768ac45",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "c43169ac",
   "metadata": {},
   "source": [
    "# Training"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c447265f",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-11-19T04:55:27.194175Z",
     "start_time": "2024-11-19T04:46:30.481621Z"
    },
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "LS_star_squence_base[0,:],LS_0_squence_base[0,:],\\\n",
    "LS_1_squence_base[0,:],LS_2_squence_base[0,:],\\\n",
    "LDA_squence_base[0,:]=W(tf.constant(subset_features),subset_labels.reshape(-1,1))\n",
    "\n",
    "\n",
    "for iter_e in range(num_epochs):\n",
    "\n",
    "    model.fit(train_dataset, epochs=1, validation_data=test_dataset)\n",
    "    \n",
    "    x = activation_model(subset_features)\n",
    "\n",
    "    train_loss_squence[iter_e],train_accuracy_squence[iter_e] = model.evaluate(train_dataset)\n",
    "    test_loss_squence[iter_e],test_accuracy_squence[iter_e] = model.evaluate(test_dataset)\n",
    "\n",
    "    for layers_i in range(len(x)-reserved_layers):\n",
    "        LS_star_squence[layers_i,iter_e],LS_0_squence[layers_i,iter_e],LS_1_squence[layers_i,iter_e],\\\n",
    "        LS_2_squence[layers_i,iter_e],LDA_squence[layers_i,iter_e]=W(x[layers_i+reserved_layers],subset_labels.reshape(-1,1))\n",
    "    \n",
    "    print('**********'+'the',iter_e,'epochs has finished'+'**********')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a3d147e4",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-11-19T04:55:27.201986Z",
     "start_time": "2024-11-19T04:55:27.197639Z"
    }
   },
   "outputs": [],
   "source": [
    "import warnings\n",
    "import time\n",
    "warnings.filterwarnings(\"ignore\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "51a8fcc4",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-11-19T04:55:27.209841Z",
     "start_time": "2024-11-19T04:55:27.204466Z"
    }
   },
   "outputs": [],
   "source": [
    "layer_name_list = get_layer_name(model)[reserved_layers:]\n",
    "layer_name_list"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bb0d2b4c",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-11-19T04:55:28.643042Z",
     "start_time": "2024-11-19T04:55:27.212365Z"
    }
   },
   "outputs": [],
   "source": [
    "layer_name_list = ['1st_dense','2nd_dense','3rd_dense','4th_dense','5th_dense','6th_dense']\n",
    "Separability_figure = plot_Separability_figure(layer_name_list,x_plot,LS_star_squence,LS_0_squence,LS_1_squence,LS_2_squence,LDA_squence,LS_star_squence_base,LS_0_squence_base,LS_1_squence_base,LS_2_squence_base,LDA_squence_base)\n",
    "net_figure = plot_net_figure(layer_name_list,x_plot,train_loss_squence,train_accuracy_squence,test_loss_squence,test_accuracy_squence)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f1366544",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-11-19T04:55:28.650054Z",
     "start_time": "2024-11-19T04:55:28.645641Z"
    }
   },
   "outputs": [],
   "source": [
    "info={'layer_name_list':layer_name_list,'x_plot':x_plot,'LS_star_squence':LS_star_squence,'LS_0_squence':LS_0_squence,\n",
    "'LS_1_squence':LS_1_squence,'LS_2_squence':LS_2_squence,'LDA_squence':LDA_squence,\n",
    "'train_loss_squence':train_loss_squence,'train_accuracy_squence':train_accuracy_squence,\n",
    "'test_loss_squence':test_loss_squence,'test_accuracy_squence':test_accuracy_squence,\n",
    "'LS_star_squence_base':LS_star_squence_base,'LS_0_squence_base':LS_0_squence_base,'LS_1_squence_base':LS_1_squence_base,\n",
    " 'LS_2_squence_base':LS_2_squence_base, 'LDA_squence_base':LDA_squence_base}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "36437a3f",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-11-19T04:55:28.656951Z",
     "start_time": "2024-11-19T04:55:28.652399Z"
    }
   },
   "outputs": [],
   "source": [
    "import pickle\n",
    "\n",
    "def save_info(info, file_path):\n",
    "    with open(file_path, 'wb') as file:\n",
    "        pickle.dump(info, file)\n",
    "\n",
    "save_info(info, '../saved_data/Marketing.pkl')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "75e65634",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "545a0ead",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "95d37add",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "tensorflow-lsm",
   "language": "python",
   "name": "tensorflow-lsm"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.12"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
