{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "63d47126",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-11-19T08:53:00.147540Z",
     "start_time": "2024-11-19T08:53:00.144073Z"
    }
   },
   "outputs": [],
   "source": [
    "import os\n",
    "os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "690d7578",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-11-19T08:53:02.605928Z",
     "start_time": "2024-11-19T08:53:00.149989Z"
    }
   },
   "outputs": [],
   "source": [
    "from Linear_Separability_numpy import *\n",
    "import tensorflow as tf\n",
    "from tensorflow import keras\n",
    "import numpy as np\n",
    "import matplotlib as mpl\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e00f39f3",
   "metadata": {},
   "source": [
    "# Load data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "39b7973a",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-11-19T08:53:38.391202Z",
     "start_time": "2024-11-19T08:53:02.608804Z"
    }
   },
   "outputs": [],
   "source": [
    "from ucimlrepo import fetch_ucirepo\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "\n",
    "ai4i_2020_predictive_maintenance_dataset = fetch_ucirepo(id=601)\n",
    "ID = ai4i_2020_predictive_maintenance_dataset.data.ids\n",
    "X = ai4i_2020_predictive_maintenance_dataset.data.features\n",
    "y = ai4i_2020_predictive_maintenance_dataset.data.targets['Machine failure'] \n",
    "\n",
    "breast_cancer_data = fetch_ucirepo(id=17)\n",
    "\n",
    "X = breast_cancer_data.data.features\n",
    "y = breast_cancer_data.data.targets['Diagnosis'] \n",
    "\n",
    "\n",
    "y = y.map({'M': 1, 'B': 0})\n",
    "\n",
    "\n",
    "num_positive = np.sum(y == 1)\n",
    "num_negative = np.sum(y == 0)\n",
    "print(\"Number of positive samples (Malignant):\", num_positive)\n",
    "print(\"Number of negative samples (Benign):\", num_negative)\n",
    "\n",
    "\n",
    "scaler = StandardScaler()\n",
    "all_features = scaler.fit_transform(X)\n",
    "\n",
    "\n",
    "all_labels = y.values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0ea656b8",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-11-19T08:53:38.398856Z",
     "start_time": "2024-11-19T08:53:38.393878Z"
    }
   },
   "outputs": [],
   "source": [
    "train_features, test_features, train_labels, test_labels = train_test_split(all_features, all_labels, test_size=0.2, random_state=42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "01b3580c",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-11-19T08:53:38.406238Z",
     "start_time": "2024-11-19T08:53:38.400725Z"
    }
   },
   "outputs": [],
   "source": [
    "train_features.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "148db3bf",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-11-19T08:53:38.413269Z",
     "start_time": "2024-11-19T08:53:38.408340Z"
    }
   },
   "outputs": [],
   "source": [
    "unique, counts = np.unique(train_labels, return_counts=True)\n",
    "positive_count = counts[unique == 1][0] if 1 in unique else 0\n",
    "negative_count = counts[unique == 0][0] if 0 in unique else 0\n",
    "print(f\"Training labels - Positive count: {positive_count}, Negative count: {negative_count}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a83e84bb",
   "metadata": {},
   "source": [
    "# Train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3b604340",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-11-19T08:53:38.418238Z",
     "start_time": "2024-11-19T08:53:38.415453Z"
    }
   },
   "outputs": [],
   "source": [
    "batch_size = 32"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a5e11e4f",
   "metadata": {},
   "source": [
    "# network structure"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b70a3124",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-11-19T08:53:40.369276Z",
     "start_time": "2024-11-19T08:53:38.427640Z"
    }
   },
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "\n",
    "train_dataset = tf.data.Dataset.from_tensor_slices((train_features, train_labels)).batch(batch_size).shuffle(buffer_size=1000)\n",
    "test_dataset = tf.data.Dataset.from_tensor_slices((test_features, test_labels)).batch(batch_size)\n",
    "\n",
    "model = tf.keras.models.Sequential([\n",
    "    tf.keras.layers.InputLayer(input_shape=(all_features.shape[1],)),  \n",
    "    tf.keras.layers.Dense(64, activation='relu'),\n",
    "    tf.keras.layers.Dense(64, activation='relu'),\n",
    "    tf.keras.layers.Dense(64, activation='relu'),\n",
    "    tf.keras.layers.Dense(64, activation='relu'),\n",
    "    tf.keras.layers.Dense(64, activation='relu'),\n",
    "    tf.keras.layers.Dense(1, activation='sigmoid') \n",
    "])\n",
    "\n",
    "model.compile(loss='binary_crossentropy',\n",
    "              optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),\n",
    "              metrics=['accuracy'])\n",
    "\n",
    "\n",
    "layer_outputs = [\n",
    "    layer.output for layer in model.layers if ('dropout' not in layer.name)\n",
    "]\n",
    "activation_model = tf.keras.models.Model(inputs=model.input, outputs=layer_outputs)\n",
    "\n",
    "activations = activation_model.predict(test_features)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "da1212c2",
   "metadata": {},
   "source": [
    "# initial record matrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4a14e153",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-11-19T08:53:40.492028Z",
     "start_time": "2024-11-19T08:53:40.371592Z"
    }
   },
   "outputs": [],
   "source": [
    "num_epochs = 100\n",
    "batch_ep = 1\n",
    "\n",
    "x_plot = np.arange(num_epochs)*batch_ep\n",
    "reserved_layers = 0\n",
    "\n",
    "for Separability_image,Separability_label in train_dataset:\n",
    "    break\n",
    "Separability_label = tf.reshape(Separability_label, (-1, 1))\n",
    "\n",
    "x=activation_model.predict(Separability_image)\n",
    "\n",
    "LS_star_squence = np.zeros((len(x)-reserved_layers,num_epochs))\n",
    "LS_0_squence = np.zeros((len(x)-reserved_layers,num_epochs))\n",
    "LS_1_squence = np.zeros((len(x)-reserved_layers,num_epochs))\n",
    "LS_2_squence = np.zeros((len(x)-reserved_layers,num_epochs))\n",
    "LDA_squence = np.zeros((len(x)-reserved_layers,num_epochs))\n",
    "\n",
    "LS_star_squence_base = np.zeros((1,num_epochs))\n",
    "LS_0_squence_base = np.zeros((1,num_epochs))\n",
    "LS_1_squence_base = np.zeros((1,num_epochs))\n",
    "LS_2_squence_base = np.zeros((1,num_epochs))\n",
    "LDA_squence_base = np.zeros((1,num_epochs))\n",
    "\n",
    "train_loss_squence = np.zeros((num_epochs,))\n",
    "train_accuracy_squence = np.zeros((num_epochs,))\n",
    "test_loss_squence = np.zeros((num_epochs,))\n",
    "test_accuracy_squence = np.zeros((num_epochs,))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5768ac45",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "c43169ac",
   "metadata": {},
   "source": [
    "# Training"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c447265f",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-11-19T08:55:03.034904Z",
     "start_time": "2024-11-19T08:53:40.496264Z"
    },
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "\n",
    "LS_star_squence_base[0,:],LS_0_squence_base[0,:],\\\n",
    "LS_1_squence_base[0,:],LS_2_squence_base[0,:],\\\n",
    "LDA_squence_base[0,:]=W(tf.constant(train_features),train_labels.reshape(-1,1))\n",
    "\n",
    "\n",
    "for iter_e in range(num_epochs):\n",
    "\n",
    "    model.fit(train_dataset, epochs=1, validation_data=test_dataset)\n",
    "    \n",
    "    x = activation_model(train_features)\n",
    "\n",
    "    train_loss_squence[iter_e],train_accuracy_squence[iter_e] = model.evaluate(train_dataset)\n",
    "    test_loss_squence[iter_e],test_accuracy_squence[iter_e] = model.evaluate(test_dataset)\n",
    "\n",
    "    for layers_i in range(len(x)-reserved_layers):\n",
    "        LS_star_squence[layers_i,iter_e],LS_0_squence[layers_i,iter_e],LS_1_squence[layers_i,iter_e],\\\n",
    "        LS_2_squence[layers_i,iter_e],LDA_squence[layers_i,iter_e]=W(x[layers_i+reserved_layers],train_labels.reshape(-1,1))\n",
    "    \n",
    "    print('**********'+'the',iter_e,'epochs has finished'+'**********')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a3d147e4",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-11-19T08:55:03.046704Z",
     "start_time": "2024-11-19T08:55:03.040199Z"
    }
   },
   "outputs": [],
   "source": [
    "import warnings\n",
    "import time\n",
    "warnings.filterwarnings(\"ignore\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "51a8fcc4",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-11-19T08:55:03.060348Z",
     "start_time": "2024-11-19T08:55:03.051074Z"
    }
   },
   "outputs": [],
   "source": [
    "layer_name_list = get_layer_name(model)[reserved_layers:]\n",
    "layer_name_list"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bb0d2b4c",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-11-19T08:55:04.497481Z",
     "start_time": "2024-11-19T08:55:03.064461Z"
    }
   },
   "outputs": [],
   "source": [
    "layer_name_list = ['1st_dense','2nd_dense','3rd_dense','4th_dense','5th_dense','6th_dense']\n",
    "Separability_figure = plot_Separability_figure(layer_name_list,x_plot,LS_star_squence,LS_0_squence,LS_1_squence,LS_2_squence,LDA_squence,LS_star_squence_base,LS_0_squence_base,LS_1_squence_base,LS_2_squence_base,LDA_squence_base)\n",
    "net_figure = plot_net_figure(layer_name_list,x_plot,train_loss_squence,train_accuracy_squence,test_loss_squence,test_accuracy_squence)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f1366544",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-11-19T08:55:04.506796Z",
     "start_time": "2024-11-19T08:55:04.501405Z"
    }
   },
   "outputs": [],
   "source": [
    "info={'layer_name_list':layer_name_list,'x_plot':x_plot,'LS_star_squence':LS_star_squence,'LS_0_squence':LS_0_squence,\n",
    "'LS_1_squence':LS_1_squence,'LS_2_squence':LS_2_squence,'LDA_squence':LDA_squence,\n",
    "'train_loss_squence':train_loss_squence,'train_accuracy_squence':train_accuracy_squence,\n",
    "'test_loss_squence':test_loss_squence,'test_accuracy_squence':test_accuracy_squence,\n",
    "'LS_star_squence_base':LS_star_squence_base,'LS_0_squence_base':LS_0_squence_base,'LS_1_squence_base':LS_1_squence_base,\n",
    " 'LS_2_squence_base':LS_2_squence_base, 'LDA_squence_base':LDA_squence_base}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "36437a3f",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-11-19T08:55:04.514081Z",
     "start_time": "2024-11-19T08:55:04.509387Z"
    }
   },
   "outputs": [],
   "source": [
    "import pickle\n",
    "\n",
    "def save_info(info, file_path):\n",
    "    with open(file_path, 'wb') as file:\n",
    "        pickle.dump(info, file)\n",
    "\n",
    "save_info(info, '../saved_data/Diagnostic.pkl')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "75e65634",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "545a0ead",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "95d37add",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "tensorflow-lsm",
   "language": "python",
   "name": "tensorflow-lsm"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.12"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
