{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "313a22ee",
   "metadata": {
    "column": 1,
    "index": 1
   },
   "source": [
    "# Section 1: Imports, Functions, and Constants"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "af0d550b",
   "metadata": {
    "column": 1,
    "index": 2
   },
   "outputs": [],
   "source": [
    "# Library Import Statements\n",
    "\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn.preprocessing import normalize\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "12c1b271",
   "metadata": {
    "column": 1,
    "index": 3
   },
   "outputs": [],
   "source": [
    "def euclidean_dist_calc(test, train):\n",
    "    \"\"\"\n",
    "    Compute the Euclidean distance between each\n",
    "    point in test and all points in train.\n",
    "    \"\"\"\n",
    "    num_test = test.shape[0]\n",
    "    num_train = train.shape[0]\n",
    "    dists = np.zeros((num_test, num_train)) \n",
    "    sum_squares_train = np.sum(np.square(train), axis=1)\n",
    "    sum_squares_test = np.sum(np.square(test), axis=1)\n",
    "    sum_combo = np.dot(test, train.T) * -2\n",
    "    dists = np.sqrt(((sum_combo + sum_squares_train).T + sum_squares_test).T)\n",
    "    return dists"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "fa827dcf",
   "metadata": {
    "column": 1,
    "index": 4
   },
   "outputs": [],
   "source": [
    "def manhattan_dist_calc(test, train):\n",
    "    \"\"\"\n",
    "    Compute the Manhattan distance between each\n",
    "    point in test and all points in train.\n",
    "    \"\"\"\n",
    "    num_test = test.shape[0]\n",
    "    num_train = train.shape[0]\n",
    "    dists = np.zeros((num_test, num_train)) \n",
    "    for i in range(num_test):\n",
    "        test_point = test[i]\n",
    "        result_matrix = train - test_point\n",
    "        result_matrix = np.absolute(result_matrix)\n",
    "        result_vector = np.sum(result_matrix, axis=1)\n",
    "        dists[i] = result_vector\n",
    "    return dists"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "55df4a01",
   "metadata": {
    "column": 1,
    "index": 5
   },
   "outputs": [],
   "source": [
    "def compute_distances(test, train, metric):\n",
    "    \"\"\"\n",
    "    Computes distances according to either \n",
    "    Euclidean or Manhattan distance metrics given\n",
    "    a test set and a training set.\n",
    "    \n",
    "    The input metric must either be \"Euclidean\" or \n",
    "    \"Manhattan\".\n",
    "    \"\"\"\n",
    "    if metric == \"Euclidean\":\n",
    "        dists = euclidean_dist_calc(test, train)\n",
    "    elif metric == \"Manhattan\":\n",
    "        dists = manhattan_dist_calc(test, train)\n",
    "    else:\n",
    "        raise ValueError(\"Invalid value d% for metric; must be Euclidean or Manhattan as a string.\" % metric)\n",
    "    return dists"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "f0191ccd",
   "metadata": {
    "column": 1,
    "index": 6
   },
   "outputs": [],
   "source": [
    "k = 1"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "205a4c01",
   "metadata": {
    "column": 1,
    "index": 7
   },
   "source": [
    "# Section 2: Make Train & Test Sets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "64049f4b",
   "metadata": {
    "column": 1,
    "index": 8
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tau1</th>\n",
       "      <th>tau2</th>\n",
       "      <th>tau3</th>\n",
       "      <th>tau4</th>\n",
       "      <th>p1</th>\n",
       "      <th>p2</th>\n",
       "      <th>p3</th>\n",
       "      <th>p4</th>\n",
       "      <th>g1</th>\n",
       "      <th>g2</th>\n",
       "      <th>g3</th>\n",
       "      <th>g4</th>\n",
       "      <th>stab</th>\n",
       "      <th>stabf</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>5000</th>\n",
       "      <td>5.825547</td>\n",
       "      <td>6.153305</td>\n",
       "      <td>9.215683</td>\n",
       "      <td>2.644971</td>\n",
       "      <td>4.749492</td>\n",
       "      <td>-1.491278</td>\n",
       "      <td>-1.382048</td>\n",
       "      <td>-1.876166</td>\n",
       "      <td>0.528459</td>\n",
       "      <td>0.155706</td>\n",
       "      <td>0.156222</td>\n",
       "      <td>0.458131</td>\n",
       "      <td>-0.023089</td>\n",
       "      <td>stable</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5001</th>\n",
       "      <td>2.750088</td>\n",
       "      <td>8.767781</td>\n",
       "      <td>4.517367</td>\n",
       "      <td>1.373404</td>\n",
       "      <td>2.700511</td>\n",
       "      <td>-0.587310</td>\n",
       "      <td>-1.190554</td>\n",
       "      <td>-0.922647</td>\n",
       "      <td>0.471845</td>\n",
       "      <td>0.428823</td>\n",
       "      <td>0.596576</td>\n",
       "      <td>0.249577</td>\n",
       "      <td>-0.021277</td>\n",
       "      <td>stable</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5002</th>\n",
       "      <td>6.253534</td>\n",
       "      <td>6.625686</td>\n",
       "      <td>0.613047</td>\n",
       "      <td>7.550439</td>\n",
       "      <td>4.305593</td>\n",
       "      <td>-1.529390</td>\n",
       "      <td>-1.415248</td>\n",
       "      <td>-1.360955</td>\n",
       "      <td>0.376768</td>\n",
       "      <td>0.686419</td>\n",
       "      <td>0.598896</td>\n",
       "      <td>0.408225</td>\n",
       "      <td>0.009619</td>\n",
       "      <td>unstable</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5003</th>\n",
       "      <td>1.800725</td>\n",
       "      <td>1.185765</td>\n",
       "      <td>1.515843</td>\n",
       "      <td>8.576087</td>\n",
       "      <td>4.255226</td>\n",
       "      <td>-1.270079</td>\n",
       "      <td>-1.970055</td>\n",
       "      <td>-1.015092</td>\n",
       "      <td>0.341150</td>\n",
       "      <td>0.623442</td>\n",
       "      <td>0.247956</td>\n",
       "      <td>0.653949</td>\n",
       "      <td>-0.038621</td>\n",
       "      <td>stable</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5004</th>\n",
       "      <td>7.150430</td>\n",
       "      <td>4.837233</td>\n",
       "      <td>3.244408</td>\n",
       "      <td>2.089166</td>\n",
       "      <td>4.539624</td>\n",
       "      <td>-1.981831</td>\n",
       "      <td>-1.375972</td>\n",
       "      <td>-1.181821</td>\n",
       "      <td>0.319280</td>\n",
       "      <td>0.072775</td>\n",
       "      <td>0.842072</td>\n",
       "      <td>0.577839</td>\n",
       "      <td>-0.027978</td>\n",
       "      <td>stable</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          tau1      tau2      tau3      tau4        p1        p2        p3  \\\n",
       "5000  5.825547  6.153305  9.215683  2.644971  4.749492 -1.491278 -1.382048   \n",
       "5001  2.750088  8.767781  4.517367  1.373404  2.700511 -0.587310 -1.190554   \n",
       "5002  6.253534  6.625686  0.613047  7.550439  4.305593 -1.529390 -1.415248   \n",
       "5003  1.800725  1.185765  1.515843  8.576087  4.255226 -1.270079 -1.970055   \n",
       "5004  7.150430  4.837233  3.244408  2.089166  4.539624 -1.981831 -1.375972   \n",
       "\n",
       "            p4        g1        g2        g3        g4      stab     stabf  \n",
       "5000 -1.876166  0.528459  0.155706  0.156222  0.458131 -0.023089    stable  \n",
       "5001 -0.922647  0.471845  0.428823  0.596576  0.249577 -0.021277    stable  \n",
       "5002 -1.360955  0.376768  0.686419  0.598896  0.408225  0.009619  unstable  \n",
       "5003 -1.015092  0.341150  0.623442  0.247956  0.653949 -0.038621    stable  \n",
       "5004 -1.181821  0.319280  0.072775  0.842072  0.577839 -0.027978    stable  "
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Data Import Cell\n",
    "url = \"http://archive.ics.uci.edu/ml/machine-learning-databases/00471/Data_for_UCI_named.csv\"\n",
    "data = pd.read_csv(url, sep=\",\")\n",
    "data = data.tail(5000)\n",
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "23f822d5",
   "metadata": {
    "column": 1,
    "index": 9
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5000    0\n",
       "5001    0\n",
       "5002    1\n",
       "5003    0\n",
       "5004    0\n",
       "       ..\n",
       "9995    1\n",
       "9996    0\n",
       "9997    0\n",
       "9998    1\n",
       "9999    1\n",
       "Name: stabf, Length: 5000, dtype: int64"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Convert Class labels to numbers\n",
    "# 0 = stable\n",
    "# 1 = unstable\n",
    "\n",
    "\n",
    "data.loc[(data['stabf'] == 'stable'), 'stabf'] = 0\n",
    "data.loc[(data['stabf'] == 'unstable'), 'stabf'] = 1\n",
    "data['stabf'] = pd.to_numeric(data['stabf'])\n",
    "data['stabf']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "5e3de39a",
   "metadata": {
    "column": 1,
    "index": 10
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tau1</th>\n",
       "      <th>tau2</th>\n",
       "      <th>tau3</th>\n",
       "      <th>tau4</th>\n",
       "      <th>p1</th>\n",
       "      <th>p2</th>\n",
       "      <th>p3</th>\n",
       "      <th>p4</th>\n",
       "      <th>g1</th>\n",
       "      <th>g2</th>\n",
       "      <th>g3</th>\n",
       "      <th>g4</th>\n",
       "      <th>stab</th>\n",
       "      <th>stabf</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>8000</th>\n",
       "      <td>5.306905</td>\n",
       "      <td>2.452792</td>\n",
       "      <td>5.414825</td>\n",
       "      <td>4.964973</td>\n",
       "      <td>2.853706</td>\n",
       "      <td>-0.952709</td>\n",
       "      <td>-1.391630</td>\n",
       "      <td>-0.509366</td>\n",
       "      <td>0.160440</td>\n",
       "      <td>0.958669</td>\n",
       "      <td>0.867021</td>\n",
       "      <td>0.183520</td>\n",
       "      <td>0.026496</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8001</th>\n",
       "      <td>7.745244</td>\n",
       "      <td>4.311693</td>\n",
       "      <td>7.893386</td>\n",
       "      <td>0.917804</td>\n",
       "      <td>3.457962</td>\n",
       "      <td>-1.425012</td>\n",
       "      <td>-1.291345</td>\n",
       "      <td>-0.741605</td>\n",
       "      <td>0.576198</td>\n",
       "      <td>0.851929</td>\n",
       "      <td>0.172012</td>\n",
       "      <td>0.788637</td>\n",
       "      <td>-0.004614</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8002</th>\n",
       "      <td>5.279096</td>\n",
       "      <td>5.458030</td>\n",
       "      <td>4.123039</td>\n",
       "      <td>9.860768</td>\n",
       "      <td>2.441075</td>\n",
       "      <td>-1.028213</td>\n",
       "      <td>-0.790346</td>\n",
       "      <td>-0.622516</td>\n",
       "      <td>0.462567</td>\n",
       "      <td>0.287156</td>\n",
       "      <td>0.252946</td>\n",
       "      <td>0.663104</td>\n",
       "      <td>0.021423</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8003</th>\n",
       "      <td>1.596736</td>\n",
       "      <td>5.744044</td>\n",
       "      <td>5.888295</td>\n",
       "      <td>2.434392</td>\n",
       "      <td>3.762364</td>\n",
       "      <td>-1.019351</td>\n",
       "      <td>-0.869846</td>\n",
       "      <td>-1.873167</td>\n",
       "      <td>0.769447</td>\n",
       "      <td>0.733218</td>\n",
       "      <td>0.096916</td>\n",
       "      <td>0.159050</td>\n",
       "      <td>-0.033336</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8004</th>\n",
       "      <td>7.214450</td>\n",
       "      <td>2.265906</td>\n",
       "      <td>3.889059</td>\n",
       "      <td>9.012663</td>\n",
       "      <td>3.675266</td>\n",
       "      <td>-0.722557</td>\n",
       "      <td>-1.249246</td>\n",
       "      <td>-1.703463</td>\n",
       "      <td>0.837425</td>\n",
       "      <td>0.599088</td>\n",
       "      <td>0.975245</td>\n",
       "      <td>0.772472</td>\n",
       "      <td>0.061052</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9995</th>\n",
       "      <td>2.930406</td>\n",
       "      <td>9.487627</td>\n",
       "      <td>2.376523</td>\n",
       "      <td>6.187797</td>\n",
       "      <td>3.343416</td>\n",
       "      <td>-0.658054</td>\n",
       "      <td>-1.449106</td>\n",
       "      <td>-1.236256</td>\n",
       "      <td>0.601709</td>\n",
       "      <td>0.779642</td>\n",
       "      <td>0.813512</td>\n",
       "      <td>0.608385</td>\n",
       "      <td>0.023892</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9996</th>\n",
       "      <td>3.392299</td>\n",
       "      <td>1.274827</td>\n",
       "      <td>2.954947</td>\n",
       "      <td>6.894759</td>\n",
       "      <td>4.349512</td>\n",
       "      <td>-1.663661</td>\n",
       "      <td>-0.952437</td>\n",
       "      <td>-1.733414</td>\n",
       "      <td>0.502079</td>\n",
       "      <td>0.567242</td>\n",
       "      <td>0.285880</td>\n",
       "      <td>0.366120</td>\n",
       "      <td>-0.025803</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9997</th>\n",
       "      <td>2.364034</td>\n",
       "      <td>2.842030</td>\n",
       "      <td>8.776391</td>\n",
       "      <td>1.008906</td>\n",
       "      <td>4.299976</td>\n",
       "      <td>-1.380719</td>\n",
       "      <td>-0.943884</td>\n",
       "      <td>-1.975373</td>\n",
       "      <td>0.487838</td>\n",
       "      <td>0.986505</td>\n",
       "      <td>0.149286</td>\n",
       "      <td>0.145984</td>\n",
       "      <td>-0.031810</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9998</th>\n",
       "      <td>9.631511</td>\n",
       "      <td>3.994398</td>\n",
       "      <td>2.757071</td>\n",
       "      <td>7.821347</td>\n",
       "      <td>2.514755</td>\n",
       "      <td>-0.966330</td>\n",
       "      <td>-0.649915</td>\n",
       "      <td>-0.898510</td>\n",
       "      <td>0.365246</td>\n",
       "      <td>0.587558</td>\n",
       "      <td>0.889118</td>\n",
       "      <td>0.818391</td>\n",
       "      <td>0.037789</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9999</th>\n",
       "      <td>6.530527</td>\n",
       "      <td>6.781790</td>\n",
       "      <td>4.349695</td>\n",
       "      <td>8.673138</td>\n",
       "      <td>3.492807</td>\n",
       "      <td>-1.390285</td>\n",
       "      <td>-1.532193</td>\n",
       "      <td>-0.570329</td>\n",
       "      <td>0.073056</td>\n",
       "      <td>0.505441</td>\n",
       "      <td>0.378761</td>\n",
       "      <td>0.942631</td>\n",
       "      <td>0.045263</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2000 rows × 14 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          tau1      tau2      tau3      tau4        p1        p2        p3  \\\n",
       "8000  5.306905  2.452792  5.414825  4.964973  2.853706 -0.952709 -1.391630   \n",
       "8001  7.745244  4.311693  7.893386  0.917804  3.457962 -1.425012 -1.291345   \n",
       "8002  5.279096  5.458030  4.123039  9.860768  2.441075 -1.028213 -0.790346   \n",
       "8003  1.596736  5.744044  5.888295  2.434392  3.762364 -1.019351 -0.869846   \n",
       "8004  7.214450  2.265906  3.889059  9.012663  3.675266 -0.722557 -1.249246   \n",
       "...        ...       ...       ...       ...       ...       ...       ...   \n",
       "9995  2.930406  9.487627  2.376523  6.187797  3.343416 -0.658054 -1.449106   \n",
       "9996  3.392299  1.274827  2.954947  6.894759  4.349512 -1.663661 -0.952437   \n",
       "9997  2.364034  2.842030  8.776391  1.008906  4.299976 -1.380719 -0.943884   \n",
       "9998  9.631511  3.994398  2.757071  7.821347  2.514755 -0.966330 -0.649915   \n",
       "9999  6.530527  6.781790  4.349695  8.673138  3.492807 -1.390285 -1.532193   \n",
       "\n",
       "            p4        g1        g2        g3        g4      stab  stabf  \n",
       "8000 -0.509366  0.160440  0.958669  0.867021  0.183520  0.026496      1  \n",
       "8001 -0.741605  0.576198  0.851929  0.172012  0.788637 -0.004614      0  \n",
       "8002 -0.622516  0.462567  0.287156  0.252946  0.663104  0.021423      1  \n",
       "8003 -1.873167  0.769447  0.733218  0.096916  0.159050 -0.033336      0  \n",
       "8004 -1.703463  0.837425  0.599088  0.975245  0.772472  0.061052      1  \n",
       "...        ...       ...       ...       ...       ...       ...    ...  \n",
       "9995 -1.236256  0.601709  0.779642  0.813512  0.608385  0.023892      1  \n",
       "9996 -1.733414  0.502079  0.567242  0.285880  0.366120 -0.025803      0  \n",
       "9997 -1.975373  0.487838  0.986505  0.149286  0.145984 -0.031810      0  \n",
       "9998 -0.898510  0.365246  0.587558  0.889118  0.818391  0.037789      1  \n",
       "9999 -0.570329  0.073056  0.505441  0.378761  0.942631  0.045263      1  \n",
       "\n",
       "[2000 rows x 14 columns]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Split dataframe into testing and training sets\n",
    "train_data = data.iloc[:3000]\n",
    "test_data = data.iloc[3000:]\n",
    "test_data"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e6fc18e4",
   "metadata": {
    "column": 1,
    "index": 11
   },
   "source": [
    "# Section 3: Split Labels from Train & Test Sets, Numpyize Them"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "fcd91dbc",
   "metadata": {
    "column": 1,
    "index": 12
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1, 0, 1, ..., 0, 1, 1], dtype=int64)"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Save labels for train and test sets\n",
    "train_labels = train_data['stabf'].to_numpy()\n",
    "test_labels = test_data['stabf'].to_numpy()\n",
    "test_labels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "96579df2",
   "metadata": {
    "column": 1,
    "index": 13
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tau1</th>\n",
       "      <th>tau2</th>\n",
       "      <th>tau3</th>\n",
       "      <th>tau4</th>\n",
       "      <th>p1</th>\n",
       "      <th>p2</th>\n",
       "      <th>p3</th>\n",
       "      <th>p4</th>\n",
       "      <th>g1</th>\n",
       "      <th>g2</th>\n",
       "      <th>g3</th>\n",
       "      <th>g4</th>\n",
       "      <th>stab</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>8000</th>\n",
       "      <td>5.306905</td>\n",
       "      <td>2.452792</td>\n",
       "      <td>5.414825</td>\n",
       "      <td>4.964973</td>\n",
       "      <td>2.853706</td>\n",
       "      <td>-0.952709</td>\n",
       "      <td>-1.391630</td>\n",
       "      <td>-0.509366</td>\n",
       "      <td>0.160440</td>\n",
       "      <td>0.958669</td>\n",
       "      <td>0.867021</td>\n",
       "      <td>0.183520</td>\n",
       "      <td>0.026496</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8001</th>\n",
       "      <td>7.745244</td>\n",
       "      <td>4.311693</td>\n",
       "      <td>7.893386</td>\n",
       "      <td>0.917804</td>\n",
       "      <td>3.457962</td>\n",
       "      <td>-1.425012</td>\n",
       "      <td>-1.291345</td>\n",
       "      <td>-0.741605</td>\n",
       "      <td>0.576198</td>\n",
       "      <td>0.851929</td>\n",
       "      <td>0.172012</td>\n",
       "      <td>0.788637</td>\n",
       "      <td>-0.004614</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8002</th>\n",
       "      <td>5.279096</td>\n",
       "      <td>5.458030</td>\n",
       "      <td>4.123039</td>\n",
       "      <td>9.860768</td>\n",
       "      <td>2.441075</td>\n",
       "      <td>-1.028213</td>\n",
       "      <td>-0.790346</td>\n",
       "      <td>-0.622516</td>\n",
       "      <td>0.462567</td>\n",
       "      <td>0.287156</td>\n",
       "      <td>0.252946</td>\n",
       "      <td>0.663104</td>\n",
       "      <td>0.021423</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8003</th>\n",
       "      <td>1.596736</td>\n",
       "      <td>5.744044</td>\n",
       "      <td>5.888295</td>\n",
       "      <td>2.434392</td>\n",
       "      <td>3.762364</td>\n",
       "      <td>-1.019351</td>\n",
       "      <td>-0.869846</td>\n",
       "      <td>-1.873167</td>\n",
       "      <td>0.769447</td>\n",
       "      <td>0.733218</td>\n",
       "      <td>0.096916</td>\n",
       "      <td>0.159050</td>\n",
       "      <td>-0.033336</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8004</th>\n",
       "      <td>7.214450</td>\n",
       "      <td>2.265906</td>\n",
       "      <td>3.889059</td>\n",
       "      <td>9.012663</td>\n",
       "      <td>3.675266</td>\n",
       "      <td>-0.722557</td>\n",
       "      <td>-1.249246</td>\n",
       "      <td>-1.703463</td>\n",
       "      <td>0.837425</td>\n",
       "      <td>0.599088</td>\n",
       "      <td>0.975245</td>\n",
       "      <td>0.772472</td>\n",
       "      <td>0.061052</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9995</th>\n",
       "      <td>2.930406</td>\n",
       "      <td>9.487627</td>\n",
       "      <td>2.376523</td>\n",
       "      <td>6.187797</td>\n",
       "      <td>3.343416</td>\n",
       "      <td>-0.658054</td>\n",
       "      <td>-1.449106</td>\n",
       "      <td>-1.236256</td>\n",
       "      <td>0.601709</td>\n",
       "      <td>0.779642</td>\n",
       "      <td>0.813512</td>\n",
       "      <td>0.608385</td>\n",
       "      <td>0.023892</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9996</th>\n",
       "      <td>3.392299</td>\n",
       "      <td>1.274827</td>\n",
       "      <td>2.954947</td>\n",
       "      <td>6.894759</td>\n",
       "      <td>4.349512</td>\n",
       "      <td>-1.663661</td>\n",
       "      <td>-0.952437</td>\n",
       "      <td>-1.733414</td>\n",
       "      <td>0.502079</td>\n",
       "      <td>0.567242</td>\n",
       "      <td>0.285880</td>\n",
       "      <td>0.366120</td>\n",
       "      <td>-0.025803</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9997</th>\n",
       "      <td>2.364034</td>\n",
       "      <td>2.842030</td>\n",
       "      <td>8.776391</td>\n",
       "      <td>1.008906</td>\n",
       "      <td>4.299976</td>\n",
       "      <td>-1.380719</td>\n",
       "      <td>-0.943884</td>\n",
       "      <td>-1.975373</td>\n",
       "      <td>0.487838</td>\n",
       "      <td>0.986505</td>\n",
       "      <td>0.149286</td>\n",
       "      <td>0.145984</td>\n",
       "      <td>-0.031810</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9998</th>\n",
       "      <td>9.631511</td>\n",
       "      <td>3.994398</td>\n",
       "      <td>2.757071</td>\n",
       "      <td>7.821347</td>\n",
       "      <td>2.514755</td>\n",
       "      <td>-0.966330</td>\n",
       "      <td>-0.649915</td>\n",
       "      <td>-0.898510</td>\n",
       "      <td>0.365246</td>\n",
       "      <td>0.587558</td>\n",
       "      <td>0.889118</td>\n",
       "      <td>0.818391</td>\n",
       "      <td>0.037789</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9999</th>\n",
       "      <td>6.530527</td>\n",
       "      <td>6.781790</td>\n",
       "      <td>4.349695</td>\n",
       "      <td>8.673138</td>\n",
       "      <td>3.492807</td>\n",
       "      <td>-1.390285</td>\n",
       "      <td>-1.532193</td>\n",
       "      <td>-0.570329</td>\n",
       "      <td>0.073056</td>\n",
       "      <td>0.505441</td>\n",
       "      <td>0.378761</td>\n",
       "      <td>0.942631</td>\n",
       "      <td>0.045263</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2000 rows × 13 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          tau1      tau2      tau3      tau4        p1        p2        p3  \\\n",
       "8000  5.306905  2.452792  5.414825  4.964973  2.853706 -0.952709 -1.391630   \n",
       "8001  7.745244  4.311693  7.893386  0.917804  3.457962 -1.425012 -1.291345   \n",
       "8002  5.279096  5.458030  4.123039  9.860768  2.441075 -1.028213 -0.790346   \n",
       "8003  1.596736  5.744044  5.888295  2.434392  3.762364 -1.019351 -0.869846   \n",
       "8004  7.214450  2.265906  3.889059  9.012663  3.675266 -0.722557 -1.249246   \n",
       "...        ...       ...       ...       ...       ...       ...       ...   \n",
       "9995  2.930406  9.487627  2.376523  6.187797  3.343416 -0.658054 -1.449106   \n",
       "9996  3.392299  1.274827  2.954947  6.894759  4.349512 -1.663661 -0.952437   \n",
       "9997  2.364034  2.842030  8.776391  1.008906  4.299976 -1.380719 -0.943884   \n",
       "9998  9.631511  3.994398  2.757071  7.821347  2.514755 -0.966330 -0.649915   \n",
       "9999  6.530527  6.781790  4.349695  8.673138  3.492807 -1.390285 -1.532193   \n",
       "\n",
       "            p4        g1        g2        g3        g4      stab  \n",
       "8000 -0.509366  0.160440  0.958669  0.867021  0.183520  0.026496  \n",
       "8001 -0.741605  0.576198  0.851929  0.172012  0.788637 -0.004614  \n",
       "8002 -0.622516  0.462567  0.287156  0.252946  0.663104  0.021423  \n",
       "8003 -1.873167  0.769447  0.733218  0.096916  0.159050 -0.033336  \n",
       "8004 -1.703463  0.837425  0.599088  0.975245  0.772472  0.061052  \n",
       "...        ...       ...       ...       ...       ...       ...  \n",
       "9995 -1.236256  0.601709  0.779642  0.813512  0.608385  0.023892  \n",
       "9996 -1.733414  0.502079  0.567242  0.285880  0.366120 -0.025803  \n",
       "9997 -1.975373  0.487838  0.986505  0.149286  0.145984 -0.031810  \n",
       "9998 -0.898510  0.365246  0.587558  0.889118  0.818391  0.037789  \n",
       "9999 -0.570329  0.073056  0.505441  0.378761  0.942631  0.045263  \n",
       "\n",
       "[2000 rows x 13 columns]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Drop class column from train and test sets\n",
    "train = train_data.drop('stabf', axis=1)\n",
    "test = test_data.drop('stabf', axis=1)\n",
    "test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "46d8d990",
   "metadata": {
    "column": 1,
    "index": 14
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 5.30690458e+00,  2.45279249e+00,  5.41482459e+00, ...,\n",
       "         8.67021410e-01,  1.83519882e-01,  2.64962906e-02],\n",
       "       [ 7.74524391e+00,  4.31169345e+00,  7.89338602e+00, ...,\n",
       "         1.72011877e-01,  7.88636816e-01, -4.61427241e-03],\n",
       "       [ 5.27909591e+00,  5.45802967e+00,  4.12303948e+00, ...,\n",
       "         2.52945554e-01,  6.63103928e-01,  2.14226453e-02],\n",
       "       ...,\n",
       "       [ 2.36403419e+00,  2.84203025e+00,  8.77639096e+00, ...,\n",
       "         1.49286458e-01,  1.45984032e-01, -3.18098881e-02],\n",
       "       [ 9.63151069e+00,  3.99439760e+00,  2.75707093e+00, ...,\n",
       "         8.89118346e-01,  8.18391326e-01,  3.77888091e-02],\n",
       "       [ 6.53052662e+00,  6.78178990e+00,  4.34969522e+00, ...,\n",
       "         3.78760930e-01,  9.42630833e-01,  4.52633082e-02]])"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Convert train and test to numpy arrays\n",
    "train = train.to_numpy()\n",
    "test = test.to_numpy()\n",
    "test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "2e018e3f",
   "metadata": {
    "column": 1,
    "index": 15
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0.01997457,  0.00929616,  0.02046053, ...,  0.03290911,\n",
       "         0.00680405,  0.01478897],\n",
       "       [ 0.0291522 ,  0.01634145,  0.02982605, ...,  0.00652897,\n",
       "         0.02923891, -0.00257547],\n",
       "       [ 0.01986991,  0.0206861 ,  0.01557937, ...,  0.00960093,\n",
       "         0.02458474,  0.0119571 ],\n",
       "       ...,\n",
       "       [ 0.00889795,  0.01077138,  0.03316259, ...,  0.00566639,\n",
       "         0.00541239, -0.01775477],\n",
       "       [ 0.03625189,  0.01513889,  0.0104179 , ...,  0.03374783,\n",
       "         0.03034206,  0.02109192],\n",
       "       [ 0.02458015,  0.02570319,  0.01643582, ...,  0.01437644,\n",
       "         0.03494827,  0.02526383]])"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# normalize the datasets\n",
    "train = normalize(train, axis=0)\n",
    "test = normalize(test, axis=0)\n",
    "test"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3d2e3aab",
   "metadata": {
    "column": 1,
    "index": 16
   },
   "source": [
    "# Section 4: Analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "5ccce276",
   "metadata": {
    "column": 1,
    "index": 17
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0.05656039, 0.04742277, 0.03528381, ..., 0.06292082, 0.0555769 ,\n",
       "        0.0355984 ],\n",
       "       [0.03771883, 0.04687068, 0.0450504 , ..., 0.05494742, 0.04898223,\n",
       "        0.04444321],\n",
       "       [0.04538253, 0.04727916, 0.03121922, ..., 0.03755693, 0.05127272,\n",
       "        0.04494746],\n",
       "       ...,\n",
       "       [0.0379053 , 0.04752331, 0.05298324, ..., 0.05169922, 0.048019  ,\n",
       "        0.04054628],\n",
       "       [0.06163712, 0.05883515, 0.0375313 , ..., 0.0623689 , 0.05840329,\n",
       "        0.04991057],\n",
       "       [0.0580277 , 0.06039776, 0.03919261, ..., 0.05632557, 0.06536506,\n",
       "        0.05633403]])"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Compute Distance Matrix\n",
    "\n",
    "dists = compute_distances(test, train, \"Euclidean\")\n",
    "#dists = compute_distances(test, train, \"Manhattan\")\n",
    "dists"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "0e27eaef",
   "metadata": {
    "column": 1,
    "index": 18
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1., 1., 1., ..., 0., 1., 1.])"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Make Predictions\n",
    "\n",
    "num_test = dists.shape[0]\n",
    "y_pred = np.zeros(num_test)\n",
    "for i in range(num_test):\n",
    "    nearest_neighbors_indices = np.argsort(dists[i])\n",
    "    closest_y = np.zeros(k)\n",
    "    for m in range(k):\n",
    "        closest_y[m] = train_labels[nearest_neighbors_indices[m]]\n",
    "    labels = []\n",
    "    label_counts = []\n",
    "    for label in closest_y:\n",
    "        if label not in labels:\n",
    "          labels.append(label)\n",
    "          label_counts.append(1)\n",
    "        else:\n",
    "          label_counts[labels.index(label)] = label_counts[labels.index(label)] + 1\n",
    "    mode_labels = np.argsort(label_counts)[::-1]\n",
    "    y_pred[i] = labels[mode_labels[0]]\n",
    "y_pred"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "0789e542",
   "metadata": {
    "column": 1,
    "index": 19
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Got 1868 / 2000 correct => accuracy: 0.934000\n"
     ]
    }
   ],
   "source": [
    "# Compute and print the fraction of correctly predicted examples\n",
    "num_correct = np.sum(y_pred == test_labels)\n",
    "accuracy = float(num_correct) / num_test\n",
    "print('Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3f1a8794",
   "metadata": {
    "column": 1,
    "index": 20
   },
   "source": [
    "# Section 5: Reminders and Task Description"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ae79576c",
   "metadata": {
    "column": 1,
    "index": 21
   },
   "source": [
    "## DOs\n",
    "- DO Run the k-value cell in Section 1 and the cells in Section 4 individually to test each parameter set (k-value and distance metric).\n",
    "- DO Edit the cell for k-value in Section 1.\n",
    "- DO Edit the cell that calculates the distances using one of two distance metrics in Section 4.\n",
    "- DO Take Notes on paper."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bb7cf002",
   "metadata": {
    "column": 1,
    "index": 22
   },
   "source": [
    "## DO NOTs\n",
    "- DO NOT Run All.\n",
    "- DO NOT Edit any cells other than the two specified.\n",
    "- DO NOT Move cells.\n",
    "- DO NOT Delete cells.\n",
    "- DO NOT Add cells.\n",
    "- DO NOT Take Notes in the notebook."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5b5b186b",
   "metadata": {
    "column": 1,
    "index": 23
   },
   "source": [
    "## Task Description Reminder\n",
    "You are tasked with tuning the parameters for k-value and distance metric to find the best k-value for each distance metric and then to determine which distance metric, with its optimal k-value, results in the highest accuracy (most accurate model). To do this, you will have to evaluate each combination."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b8df66a2",
   "metadata": {
    "column": 1,
    "index": 24
   },
   "source": [
    "## Lorem Ipsum\n",
    "Sed ut perspiciatis, unde omnis iste natus error sit voluptatem accusantium doloremque laudantium, totam rem aperiam eaque ipsa, quae ab illo inventore veritatis et quasi architecto beatae vitae dicta sunt, explicabo. Nemo enim ipsam voluptatem, quia voluptas sit, aspernatur aut odit aut fugit, sed quia consequuntur magni dolores eos, qui ratione voluptatem sequi nesciunt, neque porro quisquam est, qui dolorem ipsum, quia dolor sit amet consectetur adipiscing velit, sed quia non numquam do eius modi tempora incididunt, ut labore et dolore magnam aliquam quaerat voluptatem. Ut enim ad minima veniam, quis nostrumd exercitationem ullam corporis suscipit laboriosam, nisi ut aliquid ex ea commodi consequatur? DQuis autem vel eum irure reprehenderit, qui in ea voluptate velit esse, quam nihil molestiae consequatur, vel illum, qui dolorem eum fugiat, quo voluptas nulla pariatur? At vero eos et accusamus et iusto odio dignissimos ducimus, qui blanditiis praesentium voluptatum deleniti atque corrupti, quos dolores et quas molestias excepturi sint, obcaecati cupiditate non provident, similique sunt in culpa, qui officia deserunt mollitia animi, id est laborum et dolorum fuga. Et harum quidem rerum facilis est et expedita distinctio. Nam libero tempore, cum soluta nobis est eligendi optio, cumque nihil impedit, quo minus id, quod maxime placeat, facere possimus, omnis voluptas assumenda est, omnis dolor repellendus. Temporibus autem quibusdam et aut officiis debitis aut rerum necessitatibus saepe eveniet, ut et voluptates repudiandae sint et molestiae non recusandae. Itaque earum rerum hic tenetur a sapiente delectus, ut aut reiciendis voluptatibus maiores alias consequatur aut perferendis doloribus asperiores repellat."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7448b300",
   "metadata": {
    "column": 1,
    "index": 25
   },
   "source": [
    "# Irrelevant Text\n",
    "This is more irrelevant text padding the notebook. Don't read this any further. It's pointless. Why are you still reading this? Focus on the task at hand."
   ]
  }
 ],
 "metadata": {
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
