{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "63d47126",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-11-19T03:48:36.486497Z",
     "start_time": "2024-11-19T03:48:36.483409Z"
    }
   },
   "outputs": [],
   "source": [
    "import os\n",
    "os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "690d7578",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-11-19T03:48:39.039765Z",
     "start_time": "2024-11-19T03:48:36.488757Z"
    }
   },
   "outputs": [],
   "source": [
    "from Linear_Separability_numpy import *\n",
    "import tensorflow as tf\n",
    "from tensorflow import keras\n",
    "import numpy as np\n",
    "import matplotlib as mpl\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e00f39f3",
   "metadata": {},
   "source": [
    "# Load data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "39b7973a",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-11-19T03:48:41.796913Z",
     "start_time": "2024-11-19T03:48:39.043181Z"
    }
   },
   "outputs": [],
   "source": [
    "from ucimlrepo import fetch_ucirepo\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "\n",
    "ai4i_2020_predictive_maintenance_dataset = fetch_ucirepo(id=601)\n",
    "ID = ai4i_2020_predictive_maintenance_dataset.data.ids\n",
    "X = ai4i_2020_predictive_maintenance_dataset.data.features\n",
    "y = ai4i_2020_predictive_maintenance_dataset.data.targets['Machine failure']  \n",
    "\n",
    "product_levels = ['L', 'M', 'H']\n",
    "positive_counts = []\n",
    "negative_counts = []\n",
    "\n",
    "for level in product_levels:\n",
    "    level_indices = ID['Product ID'].str.startswith(level)\n",
    "    positive_counts.append(y[level_indices].sum())  \n",
    "    negative_counts.append((y[level_indices] == 0).sum())\n",
    "\n",
    "for i, level in enumerate(product_levels):\n",
    "    print(f\"Product quality level {level} - Positive count: {positive_counts[i]}, Negative count: {negative_counts[i]}\")\n",
    "\n",
    "categorical_features = ['Type']\n",
    "numeric_features = ['Air temperature', 'Process temperature', 'Rotational speed', 'Torque', 'Tool wear']\n",
    "\n",
    "scaler = StandardScaler()\n",
    "X_numeric = scaler.fit_transform(X[numeric_features])\n",
    "\n",
    "X_categorical = X[categorical_features].apply(lambda col: col.astype('category').cat.codes).values\n",
    "\n",
    "all_features = np.hstack((X_numeric, X_categorical))\n",
    "\n",
    "all_labels = y.values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0ea656b8",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-11-19T03:48:41.808452Z",
     "start_time": "2024-11-19T03:48:41.799734Z"
    }
   },
   "outputs": [],
   "source": [
    "train_features, test_features, train_labels, test_labels = train_test_split(all_features, all_labels, test_size=0.2, random_state=42)\n",
    "\n",
    "def random_undersample(X, y):\n",
    "    # Get unique class counts\n",
    "    unique, counts = np.unique(y, return_counts=True)\n",
    "    min_count = np.min(counts)\n",
    "\n",
    "    X_resampled, y_resampled = [], []\n",
    "    for label in unique:\n",
    "        X_class = X[y == label]\n",
    "        y_class = y[y == label]\n",
    "\n",
    "        indices = np.random.choice(len(X_class), min_count, replace=False)\n",
    "        X_resampled.append(X_class[indices])\n",
    "        y_resampled.append(y_class[indices])\n",
    "\n",
    "    X_resampled = np.vstack(X_resampled)\n",
    "    y_resampled = np.hstack(y_resampled)\n",
    "\n",
    "    return X_resampled, y_resampled\n",
    "\n",
    "train_features, train_labels = random_undersample(train_features, train_labels)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "01b3580c",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-11-19T03:48:41.816123Z",
     "start_time": "2024-11-19T03:48:41.810518Z"
    }
   },
   "outputs": [],
   "source": [
    "train_features.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "148db3bf",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-11-19T03:48:41.823505Z",
     "start_time": "2024-11-19T03:48:41.818176Z"
    }
   },
   "outputs": [],
   "source": [
    "unique, counts = np.unique(train_labels, return_counts=True)\n",
    "positive_count = counts[unique == 1][0] if 1 in unique else 0\n",
    "negative_count = counts[unique == 0][0] if 0 in unique else 0\n",
    "print(f\"Training labels - Positive count: {positive_count}, Negative count: {negative_count}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a83e84bb",
   "metadata": {},
   "source": [
    "# Train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3b604340",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-11-19T03:48:41.828374Z",
     "start_time": "2024-11-19T03:48:41.825829Z"
    }
   },
   "outputs": [],
   "source": [
    "# parameters\n",
    "batch_size = 32"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a5e11e4f",
   "metadata": {},
   "source": [
    "# network structure"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b70a3124",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-11-19T03:48:43.470535Z",
     "start_time": "2024-11-19T03:48:41.836185Z"
    }
   },
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "\n",
    "train_dataset = tf.data.Dataset.from_tensor_slices((train_features, train_labels)).batch(batch_size).shuffle(buffer_size=1000)\n",
    "test_dataset = tf.data.Dataset.from_tensor_slices((test_features, test_labels)).batch(batch_size)\n",
    "\n",
    "model = tf.keras.models.Sequential([\n",
    "    tf.keras.layers.InputLayer(input_shape=(all_features.shape[1],)),\n",
    "    tf.keras.layers.Dense(100, activation='relu'),\n",
    "    tf.keras.layers.Dense(100, activation='relu'),\n",
    "    tf.keras.layers.Dense(100, activation='relu'),\n",
    "    tf.keras.layers.Dense(100, activation='relu'),\n",
    "    tf.keras.layers.Dense(100, activation='relu'),\n",
    "    tf.keras.layers.Dense(1, activation='sigmoid') \n",
    "])\n",
    "\n",
    "model.compile(loss='binary_crossentropy',\n",
    "              optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),\n",
    "              metrics=['accuracy'])\n",
    "\n",
    "layer_outputs = [\n",
    "    layer.output for layer in model.layers if ('dropout' not in layer.name)\n",
    "]\n",
    "activation_model = tf.keras.models.Model(inputs=model.input, outputs=layer_outputs)\n",
    "\n",
    "activations = activation_model.predict(test_features)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "da1212c2",
   "metadata": {},
   "source": [
    "# initial record matrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4a14e153",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-11-19T03:48:43.596754Z",
     "start_time": "2024-11-19T03:48:43.472630Z"
    }
   },
   "outputs": [],
   "source": [
    "num_epochs = 100\n",
    "batch_ep = 1\n",
    "\n",
    "x_plot = np.arange(num_epochs)*batch_ep\n",
    "reserved_layers = 0\n",
    "\n",
    "for Separability_image,Separability_label in train_dataset:\n",
    "    break\n",
    "Separability_label = tf.reshape(Separability_label, (-1, 1))\n",
    "\n",
    "x=activation_model.predict(Separability_image)\n",
    "\n",
    "LS_star_squence = np.zeros((len(x)-reserved_layers,num_epochs))\n",
    "LS_0_squence = np.zeros((len(x)-reserved_layers,num_epochs))\n",
    "LS_1_squence = np.zeros((len(x)-reserved_layers,num_epochs))\n",
    "LS_2_squence = np.zeros((len(x)-reserved_layers,num_epochs))\n",
    "LDA_squence = np.zeros((len(x)-reserved_layers,num_epochs))\n",
    "\n",
    "LS_star_squence_base = np.zeros((1,num_epochs))\n",
    "LS_0_squence_base = np.zeros((1,num_epochs))\n",
    "LS_1_squence_base = np.zeros((1,num_epochs))\n",
    "LS_2_squence_base = np.zeros((1,num_epochs))\n",
    "LDA_squence_base = np.zeros((1,num_epochs))\n",
    "\n",
    "train_loss_squence = np.zeros((num_epochs,))\n",
    "train_accuracy_squence = np.zeros((num_epochs,))\n",
    "test_loss_squence = np.zeros((num_epochs,))\n",
    "test_accuracy_squence = np.zeros((num_epochs,))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5768ac45",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "c43169ac",
   "metadata": {},
   "source": [
    "# Training"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c447265f",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-11-19T03:51:10.959326Z",
     "start_time": "2024-11-19T03:48:43.600166Z"
    },
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "LS_star_squence_base[0,:],LS_0_squence_base[0,:],\\\n",
    "LS_1_squence_base[0,:],LS_2_squence_base[0,:],\\\n",
    "LDA_squence_base[0,:]=W(tf.constant(train_features),train_labels.reshape(-1,1))\n",
    "\n",
    "\n",
    "for iter_e in range(num_epochs):\n",
    "\n",
    "    model.fit(train_dataset, epochs=1, validation_data=test_dataset)\n",
    "    \n",
    "    x = activation_model(train_features)\n",
    "\n",
    "    train_loss_squence[iter_e],train_accuracy_squence[iter_e] = model.evaluate(train_dataset)\n",
    "    test_loss_squence[iter_e],test_accuracy_squence[iter_e] = model.evaluate(test_dataset)\n",
    "    \n",
    "    for layers_i in range(len(x)-reserved_layers):\n",
    "        LS_star_squence[layers_i,iter_e],LS_0_squence[layers_i,iter_e],LS_1_squence[layers_i,iter_e],\\\n",
    "        LS_2_squence[layers_i,iter_e],LDA_squence[layers_i,iter_e]=W(x[layers_i+reserved_layers],train_labels.reshape(-1,1))\n",
    "    \n",
    "    print('**********'+'the',iter_e,'epochs has finished'+'**********')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a3d147e4",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-11-19T03:51:10.968955Z",
     "start_time": "2024-11-19T03:51:10.962862Z"
    }
   },
   "outputs": [],
   "source": [
    "import warnings\n",
    "import time\n",
    "warnings.filterwarnings(\"ignore\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "51a8fcc4",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-11-19T03:51:10.980748Z",
     "start_time": "2024-11-19T03:51:10.973116Z"
    }
   },
   "outputs": [],
   "source": [
    "layer_name_list = get_layer_name(model)[reserved_layers:]\n",
    "layer_name_list"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bb0d2b4c",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-11-19T03:51:12.460300Z",
     "start_time": "2024-11-19T03:51:10.984718Z"
    }
   },
   "outputs": [],
   "source": [
    "layer_name_list = ['1st_dense','2nd_dense','3rd_dense','4th_dense','5th_dense','6th_dense']\n",
    "Separability_figure = plot_Separability_figure(layer_name_list,x_plot,LS_star_squence,LS_0_squence,LS_1_squence,LS_2_squence,LDA_squence,LS_star_squence_base,LS_0_squence_base,LS_1_squence_base,LS_2_squence_base,LDA_squence_base)\n",
    "net_figure = plot_net_figure(layer_name_list,x_plot,train_loss_squence,train_accuracy_squence,test_loss_squence,test_accuracy_squence)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f1366544",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-11-19T03:51:12.468216Z",
     "start_time": "2024-11-19T03:51:12.463134Z"
    }
   },
   "outputs": [],
   "source": [
    "info={'layer_name_list':layer_name_list,'x_plot':x_plot,'LS_star_squence':LS_star_squence,'LS_0_squence':LS_0_squence,\n",
    "'LS_1_squence':LS_1_squence,'LS_2_squence':LS_2_squence,'LDA_squence':LDA_squence,\n",
    "'train_loss_squence':train_loss_squence,'train_accuracy_squence':train_accuracy_squence,\n",
    "'test_loss_squence':test_loss_squence,'test_accuracy_squence':test_accuracy_squence,\n",
    "'LS_star_squence_base':LS_star_squence_base,'LS_0_squence_base':LS_0_squence_base,'LS_1_squence_base':LS_1_squence_base,\n",
    " 'LS_2_squence_base':LS_2_squence_base, 'LDA_squence_base':LDA_squence_base}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "36437a3f",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-11-19T03:51:12.475948Z",
     "start_time": "2024-11-19T03:51:12.471235Z"
    }
   },
   "outputs": [],
   "source": [
    "import pickle\n",
    "\n",
    "def save_info(info, file_path):\n",
    "    with open(file_path, 'wb') as file:\n",
    "        pickle.dump(info, file)\n",
    "\n",
    "save_info(info, '../saved_data/Maintenance.pkl')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "75e65634",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "545a0ead",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "95d37add",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "tensorflow-lsm",
   "language": "python",
   "name": "tensorflow-lsm"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.12"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
