{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Author: ML Tlachac, WPI\n",
    "#For StudentSADD, 2021\n",
    "\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn.feature_selection import SelectKBest, chi2\n",
    "from sklearn import preprocessing\n",
    "from scipy import stats\n",
    "import collections\n",
    "import operator\n",
    "import argparse\n",
    "import random\n",
    "from sklearn.model_selection import cross_validate\n",
    "from sklearn.model_selection import cross_val_predict\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import confusion_matrix\n",
    "from sklearn.metrics import recall_score\n",
    "from sklearn import metrics\n",
    "from statistics import mean \n",
    "from sklearn.naive_bayes import GaussianNB\n",
    "from sklearn.utils import resample\n",
    "from sklearn import preprocessing\n",
    "from sklearn import utils\n",
    "from sklearn.datasets import load_digits\n",
    "from sklearn import svm\n",
    "import xgboost as xgb\n",
    "from sklearn.neighbors import KNeighborsClassifier\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "import random\n",
    "from sklearn.decomposition import PCA, KernelPCA, NMF"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(115, 2271)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>F0final_sma_f0v_duration</th>\n",
       "      <th>audspec_lengthL1norm_sma_quartile1</th>\n",
       "      <th>audspec_lengthL1norm_sma_quartile2</th>\n",
       "      <th>audspec_lengthL1norm_sma_quartile3</th>\n",
       "      <th>audspec_lengthL1norm_sma_iqr1-2</th>\n",
       "      <th>audspec_lengthL1norm_sma_iqr2-3</th>\n",
       "      <th>audspec_lengthL1norm_sma_iqr1-3</th>\n",
       "      <th>audspec_lengthL1norm_sma_percentile1.0</th>\n",
       "      <th>audspec_lengthL1norm_sma_percentile99.0</th>\n",
       "      <th>...</th>\n",
       "      <th>F0final_sma_f0v_meanSegLen</th>\n",
       "      <th>F0final_sma_f0v_maxSegLen</th>\n",
       "      <th>F0final_sma_f0v_minSegLen</th>\n",
       "      <th>F0final_sma_f0v_segLenStddev</th>\n",
       "      <th>F0final_sma_f0p_meanSegLen</th>\n",
       "      <th>F0final_sma_f0p_maxSegLen</th>\n",
       "      <th>F0final_sma_f0p_minSegLen</th>\n",
       "      <th>F0final_sma_f0p_segLenStddev</th>\n",
       "      <th>label</th>\n",
       "      <th>label_q9</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>8181</td>\n",
       "      <td>8.66</td>\n",
       "      <td>0.035832</td>\n",
       "      <td>0.059170</td>\n",
       "      <td>0.196654</td>\n",
       "      <td>0.023339</td>\n",
       "      <td>0.137484</td>\n",
       "      <td>0.160822</td>\n",
       "      <td>0.000999</td>\n",
       "      <td>0.508319</td>\n",
       "      <td>...</td>\n",
       "      <td>0.277000</td>\n",
       "      <td>0.71</td>\n",
       "      <td>0.09</td>\n",
       "      <td>0.212181</td>\n",
       "      <td>0.516364</td>\n",
       "      <td>2.25</td>\n",
       "      <td>0.02</td>\n",
       "      <td>0.634311</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>8170</td>\n",
       "      <td>4.96</td>\n",
       "      <td>0.017266</td>\n",
       "      <td>0.064905</td>\n",
       "      <td>0.112320</td>\n",
       "      <td>0.047639</td>\n",
       "      <td>0.047415</td>\n",
       "      <td>0.095054</td>\n",
       "      <td>0.011481</td>\n",
       "      <td>0.216508</td>\n",
       "      <td>...</td>\n",
       "      <td>0.190000</td>\n",
       "      <td>0.35</td>\n",
       "      <td>0.06</td>\n",
       "      <td>0.098894</td>\n",
       "      <td>0.284000</td>\n",
       "      <td>0.91</td>\n",
       "      <td>0.06</td>\n",
       "      <td>0.294591</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4041</td>\n",
       "      <td>6.60</td>\n",
       "      <td>0.056151</td>\n",
       "      <td>0.107810</td>\n",
       "      <td>0.248682</td>\n",
       "      <td>0.051659</td>\n",
       "      <td>0.140872</td>\n",
       "      <td>0.192531</td>\n",
       "      <td>0.018469</td>\n",
       "      <td>0.574875</td>\n",
       "      <td>...</td>\n",
       "      <td>0.179167</td>\n",
       "      <td>0.50</td>\n",
       "      <td>0.04</td>\n",
       "      <td>0.147505</td>\n",
       "      <td>0.323077</td>\n",
       "      <td>1.92</td>\n",
       "      <td>0.03</td>\n",
       "      <td>0.541215</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>8650</td>\n",
       "      <td>6.26</td>\n",
       "      <td>0.106118</td>\n",
       "      <td>0.271905</td>\n",
       "      <td>0.470733</td>\n",
       "      <td>0.165787</td>\n",
       "      <td>0.198828</td>\n",
       "      <td>0.364615</td>\n",
       "      <td>0.000999</td>\n",
       "      <td>1.408070</td>\n",
       "      <td>...</td>\n",
       "      <td>0.210714</td>\n",
       "      <td>0.64</td>\n",
       "      <td>0.02</td>\n",
       "      <td>0.155768</td>\n",
       "      <td>0.201333</td>\n",
       "      <td>0.73</td>\n",
       "      <td>0.02</td>\n",
       "      <td>0.212849</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4782</td>\n",
       "      <td>6.46</td>\n",
       "      <td>0.038199</td>\n",
       "      <td>0.069191</td>\n",
       "      <td>0.183853</td>\n",
       "      <td>0.030992</td>\n",
       "      <td>0.114661</td>\n",
       "      <td>0.145653</td>\n",
       "      <td>0.017118</td>\n",
       "      <td>0.462369</td>\n",
       "      <td>...</td>\n",
       "      <td>0.155000</td>\n",
       "      <td>0.47</td>\n",
       "      <td>0.05</td>\n",
       "      <td>0.105396</td>\n",
       "      <td>0.334615</td>\n",
       "      <td>1.98</td>\n",
       "      <td>0.06</td>\n",
       "      <td>0.525424</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 2271 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     id  F0final_sma_f0v_duration  audspec_lengthL1norm_sma_quartile1  \\\n",
       "0  8181                      8.66                            0.035832   \n",
       "1  8170                      4.96                            0.017266   \n",
       "2  4041                      6.60                            0.056151   \n",
       "3  8650                      6.26                            0.106118   \n",
       "4  4782                      6.46                            0.038199   \n",
       "\n",
       "   audspec_lengthL1norm_sma_quartile2  audspec_lengthL1norm_sma_quartile3  \\\n",
       "0                            0.059170                            0.196654   \n",
       "1                            0.064905                            0.112320   \n",
       "2                            0.107810                            0.248682   \n",
       "3                            0.271905                            0.470733   \n",
       "4                            0.069191                            0.183853   \n",
       "\n",
       "   audspec_lengthL1norm_sma_iqr1-2  audspec_lengthL1norm_sma_iqr2-3  \\\n",
       "0                         0.023339                         0.137484   \n",
       "1                         0.047639                         0.047415   \n",
       "2                         0.051659                         0.140872   \n",
       "3                         0.165787                         0.198828   \n",
       "4                         0.030992                         0.114661   \n",
       "\n",
       "   audspec_lengthL1norm_sma_iqr1-3  audspec_lengthL1norm_sma_percentile1.0  \\\n",
       "0                         0.160822                                0.000999   \n",
       "1                         0.095054                                0.011481   \n",
       "2                         0.192531                                0.018469   \n",
       "3                         0.364615                                0.000999   \n",
       "4                         0.145653                                0.017118   \n",
       "\n",
       "   audspec_lengthL1norm_sma_percentile99.0  ...  F0final_sma_f0v_meanSegLen  \\\n",
       "0                                 0.508319  ...                    0.277000   \n",
       "1                                 0.216508  ...                    0.190000   \n",
       "2                                 0.574875  ...                    0.179167   \n",
       "3                                 1.408070  ...                    0.210714   \n",
       "4                                 0.462369  ...                    0.155000   \n",
       "\n",
       "   F0final_sma_f0v_maxSegLen  F0final_sma_f0v_minSegLen  \\\n",
       "0                       0.71                       0.09   \n",
       "1                       0.35                       0.06   \n",
       "2                       0.50                       0.04   \n",
       "3                       0.64                       0.02   \n",
       "4                       0.47                       0.05   \n",
       "\n",
       "   F0final_sma_f0v_segLenStddev  F0final_sma_f0p_meanSegLen  \\\n",
       "0                      0.212181                    0.516364   \n",
       "1                      0.098894                    0.284000   \n",
       "2                      0.147505                    0.323077   \n",
       "3                      0.155768                    0.201333   \n",
       "4                      0.105396                    0.334615   \n",
       "\n",
       "   F0final_sma_f0p_maxSegLen  F0final_sma_f0p_minSegLen  \\\n",
       "0                       2.25                       0.02   \n",
       "1                       0.91                       0.06   \n",
       "2                       1.92                       0.03   \n",
       "3                       0.73                       0.02   \n",
       "4                       1.98                       0.06   \n",
       "\n",
       "   F0final_sma_f0p_segLenStddev  label  label_q9  \n",
       "0                      0.634311      0         0  \n",
       "1                      0.294591      0         0  \n",
       "2                      0.541215      1         0  \n",
       "3                      0.212849      1         0  \n",
       "4                      0.525424      1         0  \n",
       "\n",
       "[5 rows x 2271 columns]"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = pd.read_csv(\"scriptedML.csv\")\n",
    "print(data.shape)\n",
    "\n",
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "testids = [1607712777, 292, 2613, 1610640355, 1607494599, 1607040811, 1608492986, 6390, 396, 1607734901, 1607350992, 1608992344, 1609903202, 74, 7159, 4698, 7547, 4441, 1607097951, 8479, 8170, 4707, 7516, 1609174124, 1608853150, 8516, 1611424664, 2843, 1607040596, 1953, 1607772081, 1608564004, 2627, 1607217921, 1607118643, 1607314413, 1609887404, 1608335387, 4098, 1607046006, 1608242917, 8918, 1607131299, 9754, 1607262842, 1607273026, 2478, 1607536408, 1607291545, 1608707232, 1609941585, 1608200497, 1610630377, 7711, 1607810287, 9934, 1608850448, 4041, 1609166629, 1608168856, 1607572897, 6831, 1608586814, 1608588581, 2837, 8180, 1608631410, 1607051003, 3830, 4879, 1608920128, 1607019351, 8181, 3473, 1608335906, 1607738757, 1608770486, 7564, 1607495239, 1609983150, 1607397061, 1607696074, 103, 2222]\n",
    "trainids = [4769, 1607928177, 1607269923, 7755, 4598, 1607807806, 1608741452, 3323, 1610110670, 1607133044, 9745, 1607291670, 5245, 4442, 319, 1607133218, 1607010270, 1608587203, 1609256130, 1608582258, 5028, 1609771771, 5229, 3517, 1608595561, 1608048050, 1607410780, 528, 1607134906, 3102, 1607555727, 1609887167, 3985, 7256, 3523, 1607289708, 1609890222, 850, 1608917024, 5047, 1608061691, 4782, 1608062276, 1056, 1611517276, 1607636681, 1607891972, 5571, 1609052616, 1607927243, 2525, 4353, 1610818662, 8640, 1607559849, 6706, 1608624428, 1607968838, 1608672132, 552, 1608537399, 1610381937, 1608607986, 381, 1608589576, 3920, 1608059746, 1609027319, 1607357022, 1607691623, 1609899907, 1608470962, 8791, 1610380419, 3064, 1609473849, 1607712704, 1609887249, 1609888813, 1608588103, 1244, 7279, 1607339125, 1607712682, 8472, 1269, 1607045076, 1607365865, 1846, 191, 1811, 1608702785, 1609049435, 5330, 1607257348, 1609890530, 3278, 1608586899, 1607939718, 2430, 1609893292, 60, 1607270186, 6336, 8650, 1608495626, 1608586953, 2121, 1607295286, 896, 1609889389, 1607560754, 6548, 6580, 1607440988, 1609111416, 1607807159, 8663, 1607129044, 6658, 1607799213, 3933, 1608596696, 1608663032, 1610791060, 1607135820, 1607413039, 1607659758, 1608487726, 4859, 1609142183, 1607276888, 7452, 1607368510, 1607266081, 2623, 1608416516, 2128, 3227, 5881, 6510, 1609166843, 7569, 1607712793, 1608850996, 3273, 1607939838, 9986, 3302, 1607206195, 1609082904, 1607510222, 7612, 1607022963, 1607051040, 1607719324, 1608849324, 1607642639, 1607104225, 705, 1608506424, 1608188073, 8018, 8085, 4755, 1611704179, 1607193886, 7007, 3041, 4001, 1552, 1716, 1608053349, 1608572299, 1608051417, 1607712784, 836, 1607929944, 1607795480, 1608200317, 415, 3662, 1610109929, 2496, 8550, 6868, 1608587385, 1608591490, 7370, 4549, 7505, 1879, 1876, 1608003341]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(34, 2271)\n",
      "(69, 2271)\n"
     ]
    }
   ],
   "source": [
    "#train/test split\n",
    "testdata = data[data['id'].isin(testids)]\n",
    "print(testdata.shape)\n",
    "traindata = data[data['id'].isin(trainids)]\n",
    "print(traindata.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(34, 2268)\n",
      "(69, 2268)\n"
     ]
    }
   ],
   "source": [
    "#limit to features\n",
    "testContent = testdata[testdata.columns[1:-2]]\n",
    "print(testContent.shape)\n",
    "trainContent = traindata[traindata.columns[1:-2]]\n",
    "print(trainContent.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(69, 2268)\n",
      "(34, 2268)\n"
     ]
    }
   ],
   "source": [
    "#NEED TO SCALE BEFORE FEATURE SELECTION/REDUCATION\n",
    "\n",
    "min_max_scaler = preprocessing.MinMaxScaler()  \n",
    "np_scaled = min_max_scaler.fit_transform(trainContent)\n",
    "featureSubset = pd.DataFrame(np_scaled)\n",
    "np_scaled2 =  min_max_scaler.transform(testContent)\n",
    "testSubset = pd.DataFrame(np_scaled2)\n",
    "print(featureSubset.shape)\n",
    "print(testSubset.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Chi\n",
      "0\n",
      "          0\n",
      "3  0.495396\n",
      "4  0.251878\n",
      "5  0.262933\n",
      "6  0.054995\n",
      "7  0.253475\n",
      "          0\n",
      "0  0.553184\n",
      "1  0.417441\n",
      "2  0.709358\n",
      "3  0.471724\n",
      "4  0.795493\n",
      "Chi\n",
      "1\n",
      "          0         1\n",
      "3  0.007817  0.495396\n",
      "4  0.007891  0.251878\n",
      "5  0.005759  0.262933\n",
      "6  0.008526  0.054995\n",
      "7  0.196710  0.253475\n",
      "          0         1\n",
      "0  0.009871  0.553184\n",
      "1  0.005424  0.417441\n",
      "2  0.011169  0.709358\n",
      "3  0.005309  0.471724\n",
      "4  0.008286  0.795493\n",
      "Chi\n",
      "2\n",
      "          0         1         2\n",
      "3  0.007817  0.495396  0.008532\n",
      "4  0.007891  0.251878  0.006911\n",
      "5  0.005759  0.262933  0.005477\n",
      "6  0.008526  0.054995  0.000000\n",
      "7  0.196710  0.253475  0.036454\n",
      "          0         1         2\n",
      "0  0.009871  0.553184  0.005746\n",
      "1  0.005424  0.417441  0.003197\n",
      "2  0.011169  0.709358 -0.001693\n",
      "3  0.005309  0.471724 -0.004155\n",
      "4  0.008286  0.795493 -0.001837\n",
      "Chi\n",
      "3\n",
      "          0         1         2         3\n",
      "3  0.007817  0.495396  0.008549  0.008532\n",
      "4  0.007891  0.251878  0.006918  0.006911\n",
      "5  0.005759  0.262933  0.005483  0.005477\n",
      "6  0.008526  0.054995  0.000000  0.000000\n",
      "7  0.196710  0.253475  0.036453  0.036454\n",
      "          0         1         2         3\n",
      "0  0.009871  0.553184  0.005750  0.005746\n",
      "1  0.005424  0.417441  0.003198  0.003197\n",
      "2  0.011169  0.709358 -0.001666 -0.001693\n",
      "3  0.005309  0.471724 -0.004156 -0.004155\n",
      "4  0.008286  0.795493 -0.001837 -0.001837\n",
      "Chi\n",
      "4\n",
      "          0         1         2         3         4\n",
      "3  0.001781  0.007817  0.495396  0.008549  0.008532\n",
      "4  0.001930  0.007891  0.251878  0.006918  0.006911\n",
      "5  0.000077  0.005759  0.262933  0.005483  0.005477\n",
      "6  0.002206  0.008526  0.054995  0.000000  0.000000\n",
      "7  0.071105  0.196710  0.253475  0.036453  0.036454\n",
      "          0         1         2         3         4\n",
      "0  0.001755  0.009871  0.553184  0.005750  0.005746\n",
      "1  0.001601  0.005424  0.417441  0.003198  0.003197\n",
      "2  0.000723  0.011169  0.709358 -0.001666 -0.001693\n",
      "3  0.000055  0.005309  0.471724 -0.004156 -0.004155\n",
      "4  0.001247  0.008286  0.795493 -0.001837 -0.001837\n",
      "Chi\n",
      "5\n",
      "          0         1         2         3         4         5\n",
      "3  0.001781  0.007817  0.495396  0.008549  0.008532  0.005712\n",
      "4  0.001930  0.007891  0.251878  0.006918  0.006911  0.012789\n",
      "5  0.000077  0.005759  0.262933  0.005483  0.005477  0.350259\n",
      "6  0.002206  0.008526  0.054995  0.000000  0.000000  0.012890\n",
      "7  0.071105  0.196710  0.253475  0.036453  0.036454  0.253146\n",
      "          0         1         2         3         4         5\n",
      "0  0.001755  0.009871  0.553184  0.005750  0.005746  0.002284\n",
      "1  0.001601  0.005424  0.417441  0.003198  0.003197  0.805480\n",
      "2  0.000723  0.011169  0.709358 -0.001666 -0.001693  0.586219\n",
      "3  0.000055  0.005309  0.471724 -0.004156 -0.004155  0.016750\n",
      "4  0.001247  0.008286  0.795493 -0.001837 -0.001837  0.019135\n",
      "Chi\n",
      "6\n",
      "          0         1         2         3         4         5         6\n",
      "3  0.001781  0.007817  0.495396  0.058597  0.008549  0.008532  0.005712\n",
      "4  0.001930  0.007891  0.251878  0.010141  0.006918  0.006911  0.012789\n",
      "5  0.000077  0.005759  0.262933  0.308804  0.005483  0.005477  0.350259\n",
      "6  0.002206  0.008526  0.054995  0.442778  0.000000  0.000000  0.012890\n",
      "7  0.071105  0.196710  0.253475  0.397800  0.036453  0.036454  0.253146\n",
      "          0         1         2         3         4         5         6\n",
      "0  0.001755  0.009871  0.553184  0.165065  0.005750  0.005746  0.002284\n",
      "1  0.001601  0.005424  0.417441  0.464683  0.003198  0.003197  0.805480\n",
      "2  0.000723  0.011169  0.709358  0.981278 -0.001666 -0.001693  0.586219\n",
      "3  0.000055  0.005309  0.471724  0.859893 -0.004156 -0.004155  0.016750\n",
      "4  0.001247  0.008286  0.795493  0.293442 -0.001837 -0.001837  0.019135\n",
      "Chi\n",
      "7\n",
      "          0         1         2         3    4         5         6         7\n",
      "3  0.001781  0.007817  0.495396  0.058597  0.0  0.008549  0.008532  0.005712\n",
      "4  0.001930  0.007891  0.251878  0.010141  0.0  0.006918  0.006911  0.012789\n",
      "5  0.000077  0.005759  0.262933  0.308804  0.0  0.005483  0.005477  0.350259\n",
      "6  0.002206  0.008526  0.054995  0.442778  0.0  0.000000  0.000000  0.012890\n",
      "7  0.071105  0.196710  0.253475  0.397800  0.0  0.036453  0.036454  0.253146\n",
      "          0         1         2         3    4         5         6         7\n",
      "0  0.001755  0.009871  0.553184  0.165065  0.0  0.005750  0.005746  0.002284\n",
      "1  0.001601  0.005424  0.417441  0.464683  0.0  0.003198  0.003197  0.805480\n",
      "2  0.000723  0.011169  0.709358  0.981278  0.0 -0.001666 -0.001693  0.586219\n",
      "3  0.000055  0.005309  0.471724  0.859893  0.0 -0.004156 -0.004155  0.016750\n",
      "4  0.001247  0.008286  0.795493  0.293442  0.0 -0.001837 -0.001837  0.019135\n",
      "Chi\n",
      "8\n",
      "          0         1         2         3         4    5         6         7  \\\n",
      "3  0.000000  0.001781  0.007817  0.495396  0.058597  0.0  0.008549  0.008532   \n",
      "4  0.017544  0.001930  0.007891  0.251878  0.010141  0.0  0.006918  0.006911   \n",
      "5  0.032164  0.000077  0.005759  0.262933  0.308804  0.0  0.005483  0.005477   \n",
      "6  0.005848  0.002206  0.008526  0.054995  0.442778  0.0  0.000000  0.000000   \n",
      "7  0.040936  0.071105  0.196710  0.253475  0.397800  0.0  0.036453  0.036454   \n",
      "\n",
      "          8  \n",
      "3  0.005712  \n",
      "4  0.012789  \n",
      "5  0.350259  \n",
      "6  0.012890  \n",
      "7  0.253146  \n",
      "          0         1         2         3         4    5         6         7  \\\n",
      "0  0.011696  0.001755  0.009871  0.553184  0.165065  0.0  0.005750  0.005746   \n",
      "1  0.011696  0.001601  0.005424  0.417441  0.464683  0.0  0.003198  0.003197   \n",
      "2  0.035088  0.000723  0.011169  0.709358  0.981278  0.0 -0.001666 -0.001693   \n",
      "3  0.002924  0.000055  0.005309  0.471724  0.859893  0.0 -0.004156 -0.004155   \n",
      "4  0.482456  0.001247  0.008286  0.795493  0.293442  0.0 -0.001837 -0.001837   \n",
      "\n",
      "          8  \n",
      "0  0.002284  \n",
      "1  0.805480  \n",
      "2  0.586219  \n",
      "3  0.016750  \n",
      "4  0.019135  \n",
      "Chi\n",
      "9\n",
      "              0         1         2         3         4         5    6  \\\n",
      "3  1.244155e-03  0.000000  0.001781  0.007817  0.495396  0.058597  0.0   \n",
      "4  5.044764e-04  0.017544  0.001930  0.007891  0.251878  0.010141  0.0   \n",
      "5  2.647664e-04  0.032164  0.000077  0.005759  0.262933  0.308804  0.0   \n",
      "6  2.020020e-06  0.005848  0.002206  0.008526  0.054995  0.442778  0.0   \n",
      "7  4.681953e-08  0.040936  0.071105  0.196710  0.253475  0.397800  0.0   \n",
      "\n",
      "          7         8         9  \n",
      "3  0.008549  0.008532  0.005712  \n",
      "4  0.006918  0.006911  0.012789  \n",
      "5  0.005483  0.005477  0.350259  \n",
      "6  0.000000  0.000000  0.012890  \n",
      "7  0.036453  0.036454  0.253146  \n",
      "              0         1         2         3         4         5    6  \\\n",
      "0  6.470236e-06  0.011696  0.001755  0.009871  0.553184  0.165065  0.0   \n",
      "1  3.394523e-08  0.011696  0.001601  0.005424  0.417441  0.464683  0.0   \n",
      "2  8.959268e-06  0.035088  0.000723  0.011169  0.709358  0.981278  0.0   \n",
      "3  7.712134e-07  0.002924  0.000055  0.005309  0.471724  0.859893  0.0   \n",
      "4  1.646928e-05  0.482456  0.001247  0.008286  0.795493  0.293442  0.0   \n",
      "\n",
      "          7         8         9  \n",
      "0  0.005750  0.005746  0.002284  \n",
      "1  0.003198  0.003197  0.805480  \n",
      "2 -0.001666 -0.001693  0.586219  \n",
      "3 -0.004156 -0.004155  0.016750  \n",
      "4 -0.001837 -0.001837  0.019135  \n",
      "PCA\n",
      "0\n",
      "          0\n",
      "3  6.931593\n",
      "4  6.138009\n",
      "5  7.117302\n",
      "6  7.061541\n",
      "7  6.957280\n",
      "          0\n",
      "0  5.598703\n",
      "1  5.634713\n",
      "2  6.336897\n",
      "3  7.355297\n",
      "4  7.612249\n",
      "PCA\n",
      "1\n",
      "          0         1\n",
      "3  6.931593 -0.287883\n",
      "4  6.138009  1.226432\n",
      "5  7.117302  0.451058\n",
      "6  7.061542  0.023720\n",
      "7  6.957279  0.879243\n",
      "          0         1\n",
      "0  5.598703  0.836926\n",
      "1  5.634713 -1.557737\n",
      "2  6.336897 -0.560595\n",
      "3  7.355297 -0.267990\n",
      "4  7.612250  1.872548\n",
      "PCA\n",
      "2\n",
      "          0         1         2\n",
      "3  6.931594 -0.287908 -0.898711\n",
      "4  6.138009  1.226440  1.078790\n",
      "5  7.117302  0.451080 -0.485690\n",
      "6  7.061541  0.023700 -0.263965\n",
      "7  6.957280  0.879248  1.009040\n",
      "          0         1         2\n",
      "0  5.598703  0.836909  0.033951\n",
      "1  5.634713 -1.557752  0.116365\n",
      "2  6.336897 -0.560613 -1.773788\n",
      "3  7.355296 -0.268006 -2.090126\n",
      "4  7.612250  1.872542 -0.303492\n",
      "PCA\n",
      "3\n",
      "          0         1         2         3\n",
      "3  6.931593 -0.287907 -0.898750 -1.905636\n",
      "4  6.138009  1.226441  1.078752 -1.316587\n",
      "5  7.117303  0.451078 -0.485834 -2.238383\n",
      "6  7.061541  0.023700 -0.263966 -0.664050\n",
      "7  6.957280  0.879254  1.009019 -0.217691\n",
      "          0         1         2         3\n",
      "0  5.598703  0.836908  0.033984 -0.650146\n",
      "1  5.634713 -1.557749  0.116388 -2.451478\n",
      "2  6.336897 -0.560616 -1.773739  0.043659\n",
      "3  7.355296 -0.268005 -2.090104  0.718201\n",
      "4  7.612249  1.872539 -0.303557  2.393069\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "PCA\n",
      "4\n",
      "          0         1         2         3         4\n",
      "3  6.931593 -0.287909 -0.898771 -1.905514 -2.473989\n",
      "4  6.138009  1.226440  1.078757 -1.316566 -1.187126\n",
      "5  7.117303  0.451084 -0.485797 -2.238323 -1.989322\n",
      "6  7.061541  0.023701 -0.263952 -0.664166 -0.837286\n",
      "7  6.957280  0.879252  1.009004 -0.217648 -1.753163\n",
      "          0         1         2         3         4\n",
      "0  5.598703  0.836906  0.033976 -0.650111 -0.897781\n",
      "1  5.634713 -1.557752  0.116379 -2.451498 -0.045335\n",
      "2  6.336897 -0.560618 -1.773747  0.043574 -1.303304\n",
      "3  7.355296 -0.268007 -2.090106  0.718117 -1.477337\n",
      "4  7.612249  1.872542 -0.303556  2.393115 -1.047652\n",
      "PCA\n",
      "5\n",
      "          0         1         2         3         4         5\n",
      "3  6.931593 -0.287911 -0.898764 -1.905520 -2.473255  0.995036\n",
      "4  6.138009  1.226438  1.078762 -1.316547 -1.186284  0.560277\n",
      "5  7.117302  0.451073 -0.485807 -2.238220 -1.992466  2.058900\n",
      "6  7.061541  0.023701 -0.263947 -0.664158 -0.836749  0.710513\n",
      "7  6.957280  0.879249  1.008996 -0.217628 -1.753232 -1.332136\n",
      "          0         1         2         3         4         5\n",
      "0  5.598703  0.836913  0.033979 -0.650163 -0.897350 -0.736805\n",
      "1  5.634713 -1.557751  0.116380 -2.451506 -0.044601 -0.200173\n",
      "2  6.336897 -0.560609 -1.773740  0.043516 -1.302133  0.001801\n",
      "3  7.355296 -0.268001 -2.090101  0.718087 -1.476987  0.435412\n",
      "4  7.612249  1.872543 -0.303558  2.393107 -1.047284  1.055771\n",
      "PCA\n",
      "6\n",
      "          0         1         2         3         4         5         6\n",
      "3  6.931570 -0.288573 -0.898814 -1.905621 -2.478562  1.011123  0.850635\n",
      "4  6.138006  1.226466  1.078815 -1.316421 -1.183510  0.529812 -1.228618\n",
      "5  7.117320  0.449490 -0.486118 -2.239250 -2.002089  2.124158 -1.273605\n",
      "6  7.061562  0.023544 -0.263913 -0.664296 -0.842179  0.710497 -0.469650\n",
      "7  6.957296  0.879108  1.008889 -0.218492 -1.759204 -1.329388 -0.511918\n",
      "          0         1         2         3         4         5         6\n",
      "0  5.598691  0.837219  0.034270 -0.649774 -0.892662 -0.741310  2.249702\n",
      "1  5.634711 -1.557782  0.116363 -2.452103 -0.047461 -0.209705  1.516783\n",
      "2  6.336908 -0.560275 -1.773505  0.043991 -1.297280  0.005283  2.197167\n",
      "3  7.355305 -0.268142 -2.090068  0.717592 -1.479451  0.445417  1.361822\n",
      "4  7.612265  1.871917 -0.303586  2.392613 -1.055196  1.093969  0.070707\n",
      "PCA\n",
      "7\n",
      "          0         1         2         3         4         5         6  \\\n",
      "3  6.931553 -0.287774 -0.898557 -1.905828 -2.469427  1.009078  0.948063   \n",
      "4  6.137985  1.226464  1.078678 -1.316214 -1.183008  0.565293 -1.311259   \n",
      "5  7.117362  0.451112 -0.485748 -2.238798 -1.997099  2.051971 -1.262069   \n",
      "6  7.061470  0.023615 -0.263907 -0.663052 -0.833419  0.725084 -0.447522   \n",
      "7  6.957319  0.879267  1.008799 -0.217946 -1.755070 -1.343778 -0.577276   \n",
      "\n",
      "          7  \n",
      "3  0.403838  \n",
      "4 -1.414385  \n",
      "5  0.029070  \n",
      "6  0.257967  \n",
      "7 -1.153752  \n",
      "          0         1         2         3         4         5         6  \\\n",
      "0  5.598726  0.836963  0.033904 -0.650279 -0.899006 -0.743551  2.157959   \n",
      "1  5.634745 -1.557775  0.116335 -2.452022 -0.047613 -0.202907  1.507654   \n",
      "2  6.336875 -0.560649 -1.773503  0.043836 -1.301121  0.012940  2.146766   \n",
      "3  7.355300 -0.267968 -2.089812  0.718024 -1.478813  0.440538  1.431183   \n",
      "4  7.612240  1.872588 -0.303534  2.394016 -1.047004  1.059662  0.075835   \n",
      "\n",
      "          7  \n",
      "0 -0.750801  \n",
      "1 -0.047345  \n",
      "2  0.113576  \n",
      "3  1.037162  \n",
      "4  0.186380  \n",
      "PCA\n",
      "8\n",
      "          0         1         2         3         4         5         6  \\\n",
      "3  6.931587 -0.288147 -0.898921 -1.905736 -2.465925  0.984942  1.012549   \n",
      "4  6.138013  1.226320  1.078948 -1.315798 -1.184779  0.550571 -1.301027   \n",
      "5  7.117308  0.451555 -0.485508 -2.242304 -2.012013  2.045241 -1.285641   \n",
      "6  7.061550  0.023731 -0.263595 -0.663174 -0.839216  0.681548 -0.472661   \n",
      "7  6.957270  0.879256  1.008706 -0.215751 -1.745292 -1.310415 -0.594256   \n",
      "\n",
      "          7         8  \n",
      "3  0.397572 -0.680033  \n",
      "4 -1.435605  0.300099  \n",
      "5  0.053054  0.507698  \n",
      "6  0.233622 -0.930218  \n",
      "7 -1.159036  0.049700  \n",
      "          0         1         2         3         4         5         6  \\\n",
      "0  5.598695  0.836847  0.033955 -0.649464 -0.896194 -0.751068  2.180627   \n",
      "1  5.634709 -1.557711  0.116322 -2.451506 -0.044058 -0.201639  1.509605   \n",
      "2  6.336899 -0.560560 -1.773602  0.044157 -1.304849 -0.017846  2.160092   \n",
      "3  7.355297 -0.267871 -2.089981  0.716984 -1.479379  0.409451  1.442061   \n",
      "4  7.612257  1.872609 -0.303363  2.393529 -1.051270  1.056442  0.079258   \n",
      "\n",
      "          7         8  \n",
      "0 -0.729925  0.170854  \n",
      "1 -0.048622 -0.187732  \n",
      "2  0.109992  0.108983  \n",
      "3  1.032021 -0.291286  \n",
      "4  0.177035 -0.283964  \n",
      "PCA\n",
      "9\n",
      "          0         1         2         3         4         5         6  \\\n",
      "3  6.931574 -0.288274 -0.897973 -1.906466 -2.468264  0.983723  0.983260   \n",
      "4  6.137982  1.226322  1.078853 -1.317270 -1.185692  0.552923 -1.336635   \n",
      "5  7.117295  0.452124 -0.484059 -2.237837 -1.992172  2.118651 -1.285998   \n",
      "6  7.061532  0.023725 -0.263878 -0.663465 -0.835281  0.714314 -0.462815   \n",
      "7  6.957331  0.878800  1.008554 -0.217260 -1.753119 -1.350734 -0.572149   \n",
      "\n",
      "          7         8         9  \n",
      "3  0.352102 -0.917780  0.277030  \n",
      "4 -1.389221  0.122374  0.535345  \n",
      "5  0.272099  0.187581  1.602965  \n",
      "6  0.251684 -0.908533  1.049353  \n",
      "7 -1.280557  0.123320 -0.617168  \n",
      "          0         1         2         3         4         5         6  \\\n",
      "0  5.598683  0.837110  0.034152 -0.650325 -0.895764 -0.729722  2.143938   \n",
      "1  5.634723 -1.558113  0.116077 -2.452177 -0.044895 -0.219613  1.533249   \n",
      "2  6.336897 -0.560510 -1.773849  0.044285 -1.301193  0.012512  2.162083   \n",
      "3  7.355293 -0.267943 -2.089676  0.718961 -1.474730  0.446909  1.450609   \n",
      "4  7.612265  1.872453 -0.304100  2.393450 -1.051073  1.059859  0.095977   \n",
      "\n",
      "          7         8         9  \n",
      "0 -0.724000  0.181370  0.280377  \n",
      "1 -0.150591  0.075854  0.685101  \n",
      "2  0.094263  0.256254  0.183487  \n",
      "3  1.020248 -0.118771 -0.094572  \n",
      "4  0.186497 -0.401273  0.747591  \n",
      "Chi\n",
      "0\n",
      "          0\n",
      "0  0.005911\n",
      "1  0.000223\n",
      "2  0.001207\n",
      "3  0.006602\n",
      "4  0.006373\n",
      "          0\n",
      "0  0.000218\n",
      "1  0.000057\n",
      "2  0.000967\n",
      "3  0.000050\n",
      "4  0.000038\n",
      "Chi\n",
      "1\n",
      "          0         1\n",
      "0  0.005911  0.006136\n",
      "1  0.000223  0.000205\n",
      "2  0.001207  0.001068\n",
      "3  0.006602  0.006010\n",
      "4  0.006373  0.005681\n",
      "          0         1\n",
      "0  0.000218  0.000196\n",
      "1  0.000057  0.000058\n",
      "2  0.000967  0.000847\n",
      "3  0.000050  0.000055\n",
      "4  0.000038  0.000031\n",
      "Chi\n",
      "2\n",
      "          0         1         2\n",
      "0  0.006179  0.005911  0.006136\n",
      "1  0.000208  0.000223  0.000205\n",
      "2  0.001071  0.001207  0.001068\n",
      "3  0.006269  0.006602  0.006010\n",
      "4  0.005690  0.006373  0.005681\n",
      "          0         1         2\n",
      "0  0.000198  0.000218  0.000196\n",
      "1  0.000058  0.000057  0.000058\n",
      "2  0.000856  0.000967  0.000847\n",
      "3  0.000066  0.000050  0.000055\n",
      "4  0.000031  0.000038  0.000031\n",
      "Chi\n",
      "3\n",
      "          0         1         2         3\n",
      "0  0.006179  0.005911  0.006136  0.005651\n",
      "1  0.000208  0.000223  0.000205  0.000176\n",
      "2  0.001071  0.001207  0.001068  0.000170\n",
      "3  0.006269  0.006602  0.006010  0.003281\n",
      "4  0.005690  0.006373  0.005681  0.001081\n",
      "          0         1         2         3\n",
      "0  0.000198  0.000218  0.000196  0.000078\n",
      "1  0.000058  0.000057  0.000058  0.000121\n",
      "2  0.000856  0.000967  0.000847  0.000176\n",
      "3  0.000066  0.000050  0.000055  0.000069\n",
      "4  0.000031  0.000038  0.000031 -0.000004\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\mltla\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:133: RuntimeWarning: invalid value encountered in double_scalars\n",
      "C:\\Users\\mltla\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:133: RuntimeWarning: invalid value encountered in double_scalars\n",
      "C:\\Users\\mltla\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:133: RuntimeWarning: invalid value encountered in double_scalars\n",
      "C:\\Users\\mltla\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:133: RuntimeWarning: invalid value encountered in double_scalars\n",
      "C:\\Users\\mltla\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:133: RuntimeWarning: invalid value encountered in double_scalars\n",
      "C:\\Users\\mltla\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:133: RuntimeWarning: invalid value encountered in double_scalars\n",
      "C:\\Users\\mltla\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:133: RuntimeWarning: invalid value encountered in double_scalars\n",
      "C:\\Users\\mltla\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:133: RuntimeWarning: invalid value encountered in double_scalars\n",
      "C:\\Users\\mltla\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:133: RuntimeWarning: invalid value encountered in double_scalars\n",
      "C:\\Users\\mltla\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:133: RuntimeWarning: invalid value encountered in double_scalars\n",
      "C:\\Users\\mltla\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:133: RuntimeWarning: invalid value encountered in double_scalars\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Chi\n",
      "4\n",
      "          0         1         2         3         4\n",
      "0  0.006179  0.005911  0.006136  0.005651  0.005654\n",
      "1  0.000208  0.000223  0.000205  0.000176  0.000177\n",
      "2  0.001071  0.001207  0.001068  0.000170  0.000171\n",
      "3  0.006269  0.006602  0.006010  0.003281  0.003281\n",
      "4  0.005690  0.006373  0.005681  0.001081  0.001081\n",
      "          0         1         2         3         4\n",
      "0  0.000198  0.000218  0.000196  0.000078  0.000079\n",
      "1  0.000058  0.000057  0.000058  0.000121  0.000121\n",
      "2  0.000856  0.000967  0.000847  0.000176  0.000174\n",
      "3  0.000066  0.000050  0.000055  0.000069  0.000068\n",
      "4  0.000031  0.000038  0.000031 -0.000004 -0.000004\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\mltla\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:133: RuntimeWarning: invalid value encountered in double_scalars\n",
      "C:\\Users\\mltla\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:133: RuntimeWarning: invalid value encountered in double_scalars\n",
      "C:\\Users\\mltla\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:133: RuntimeWarning: invalid value encountered in double_scalars\n",
      "C:\\Users\\mltla\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:133: RuntimeWarning: invalid value encountered in double_scalars\n",
      "C:\\Users\\mltla\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:133: RuntimeWarning: invalid value encountered in double_scalars\n",
      "C:\\Users\\mltla\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:133: RuntimeWarning: invalid value encountered in double_scalars\n",
      "C:\\Users\\mltla\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:133: RuntimeWarning: invalid value encountered in double_scalars\n",
      "C:\\Users\\mltla\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:133: RuntimeWarning: invalid value encountered in double_scalars\n",
      "C:\\Users\\mltla\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:133: RuntimeWarning: invalid value encountered in double_scalars\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Chi\n",
      "5\n",
      "          0         1         2         3         4         5\n",
      "0  0.006179  0.007431  0.005911  0.006136  0.005651  0.005654\n",
      "1  0.000208  0.000100  0.000223  0.000205  0.000176  0.000177\n",
      "2  0.001071  0.000267  0.001207  0.001068  0.000170  0.000171\n",
      "3  0.006269  0.002603  0.006602  0.006010  0.003281  0.003281\n",
      "4  0.005690  0.001708  0.006373  0.005681  0.001081  0.001081\n",
      "          0         1         2         3         4         5\n",
      "0  0.000198  0.000070  0.000218  0.000196  0.000078  0.000079\n",
      "1  0.000058  0.000062  0.000057  0.000058  0.000121  0.000121\n",
      "2  0.000856  0.000155  0.000967  0.000847  0.000176  0.000174\n",
      "3  0.000066  0.000086  0.000050  0.000055  0.000069  0.000068\n",
      "4  0.000031 -0.000005  0.000038  0.000031 -0.000004 -0.000004\n",
      "Chi\n",
      "6\n",
      "          0         1         2         3         4         5         6\n",
      "0  0.007718  0.006179  0.007431  0.005911  0.006136  0.005651  0.005654\n",
      "1  0.000126  0.000208  0.000100  0.000223  0.000205  0.000176  0.000177\n",
      "2  0.000291  0.001071  0.000267  0.001207  0.001068  0.000170  0.000171\n",
      "3  0.004355  0.006269  0.002603  0.006602  0.006010  0.003281  0.003281\n",
      "4  0.001770  0.005690  0.001708  0.006373  0.005681  0.001081  0.001081\n",
      "          0         1         2         3         4         5         6\n",
      "0  0.000087  0.000198  0.000070  0.000218  0.000196  0.000078  0.000079\n",
      "1  0.000063  0.000058  0.000062  0.000057  0.000058  0.000121  0.000121\n",
      "2  0.000215  0.000856  0.000155  0.000967  0.000847  0.000176  0.000174\n",
      "3  0.000157  0.000066  0.000086  0.000050  0.000055  0.000069  0.000068\n",
      "4 -0.000005  0.000031 -0.000005  0.000038  0.000031 -0.000004 -0.000004\n",
      "Chi\n",
      "7\n",
      "          0         1         2         3         4         5         6  \\\n",
      "0  0.007718  0.006179  0.007431  0.005911  0.006136  0.000340  0.005651   \n",
      "1  0.000126  0.000208  0.000100  0.000223  0.000205  0.000001  0.000176   \n",
      "2  0.000291  0.001071  0.000267  0.001207  0.001068  0.000008  0.000170   \n",
      "3  0.004355  0.006269  0.002603  0.006602  0.006010  0.000074  0.003281   \n",
      "4  0.001770  0.005690  0.001708  0.006373  0.005681  0.001031  0.001081   \n",
      "\n",
      "          7  \n",
      "0  0.005654  \n",
      "1  0.000177  \n",
      "2  0.000171  \n",
      "3  0.003281  \n",
      "4  0.001081  \n",
      "          0         1         2         3         4             5         6  \\\n",
      "0  0.000087  0.000198  0.000070  0.000218  0.000196  7.799512e-07  0.000078   \n",
      "1  0.000063  0.000058  0.000062  0.000057  0.000058 -9.187907e-09  0.000121   \n",
      "2  0.000215  0.000856  0.000155  0.000967  0.000847  4.374465e-07  0.000176   \n",
      "3  0.000157  0.000066  0.000086  0.000050  0.000055 -2.388856e-08  0.000069   \n",
      "4 -0.000005  0.000031 -0.000005  0.000038  0.000031  6.823552e-06 -0.000004   \n",
      "\n",
      "          7  \n",
      "0  0.000079  \n",
      "1  0.000121  \n",
      "2  0.000174  \n",
      "3  0.000068  \n",
      "4 -0.000004  \n",
      "Chi\n",
      "8\n",
      "          0         1         2         3         4         5         6  \\\n",
      "0  0.007718  0.006179  0.007431  0.005911  0.006136  0.000340  0.005651   \n",
      "1  0.000126  0.000208  0.000100  0.000223  0.000205  0.000001  0.000176   \n",
      "2  0.000291  0.001071  0.000267  0.001207  0.001068  0.000008  0.000170   \n",
      "3  0.004355  0.006269  0.002603  0.006602  0.006010  0.000074  0.003281   \n",
      "4  0.001770  0.005690  0.001708  0.006373  0.005681  0.001031  0.001081   \n",
      "\n",
      "          7         8  \n",
      "0  0.005654  0.006241  \n",
      "1  0.000177  0.000158  \n",
      "2  0.000171  0.000288  \n",
      "3  0.003281  0.003537  \n",
      "4  0.001081  0.003565  \n",
      "          0         1         2         3         4             5         6  \\\n",
      "0  0.000087  0.000198  0.000070  0.000218  0.000196  7.799512e-07  0.000078   \n",
      "1  0.000063  0.000058  0.000062  0.000057  0.000058 -9.187907e-09  0.000121   \n",
      "2  0.000215  0.000856  0.000155  0.000967  0.000847  4.374465e-07  0.000176   \n",
      "3  0.000157  0.000066  0.000086  0.000050  0.000055 -2.388856e-08  0.000069   \n",
      "4 -0.000005  0.000031 -0.000005  0.000038  0.000031  6.823552e-06 -0.000004   \n",
      "\n",
      "          7         8  \n",
      "0  0.000079  0.000115  \n",
      "1  0.000121  0.000085  \n",
      "2  0.000174  0.000202  \n",
      "3  0.000068  0.000066  \n",
      "4 -0.000004 -0.000003  \n",
      "Chi\n",
      "9\n",
      "          0         1         2         3         4         5         6  \\\n",
      "0  0.007718  0.006179  0.007431  0.005911  0.006136  0.000340  0.005651   \n",
      "1  0.000126  0.000208  0.000100  0.000223  0.000205  0.000001  0.000176   \n",
      "2  0.000291  0.001071  0.000267  0.001207  0.001068  0.000008  0.000170   \n",
      "3  0.004355  0.006269  0.002603  0.006602  0.006010  0.000074  0.003281   \n",
      "4  0.001770  0.005690  0.001708  0.006373  0.005681  0.001031  0.001081   \n",
      "\n",
      "          7         8         9  \n",
      "0  0.006619  0.005654  0.006241  \n",
      "1  0.000146  0.000177  0.000158  \n",
      "2  0.000363  0.000171  0.000288  \n",
      "3  0.003701  0.003281  0.003537  \n",
      "4  0.005161  0.001081  0.003565  \n",
      "          0         1         2         3         4             5         6  \\\n",
      "0  0.000087  0.000198  0.000070  0.000218  0.000196  7.799512e-07  0.000078   \n",
      "1  0.000063  0.000058  0.000062  0.000057  0.000058 -9.187907e-09  0.000121   \n",
      "2  0.000215  0.000856  0.000155  0.000967  0.000847  4.374465e-07  0.000176   \n",
      "3  0.000157  0.000066  0.000086  0.000050  0.000055 -2.388856e-08  0.000069   \n",
      "4 -0.000005  0.000031 -0.000005  0.000038  0.000031  6.823552e-06 -0.000004   \n",
      "\n",
      "          7         8         9  \n",
      "0  0.000139  0.000079  0.000115  \n",
      "1  0.000062  0.000121  0.000085  \n",
      "2  0.000220  0.000174  0.000202  \n",
      "3  0.000065  0.000068  0.000066  \n",
      "4 -0.000002 -0.000004 -0.000003  \n",
      "PCA\n",
      "0\n",
      "          0\n",
      "0  3.732135\n",
      "1  5.343661\n",
      "2  6.193566\n",
      "3  6.931593\n",
      "4  6.138009\n",
      "          0\n",
      "0  5.598703\n",
      "1  5.634713\n",
      "2  6.336897\n",
      "3  7.355297\n",
      "4  7.612249\n",
      "PCA\n",
      "1\n",
      "          0         1\n",
      "0  3.732135 -0.881406\n",
      "1  5.343660 -0.303583\n",
      "2  6.193565 -1.049921\n",
      "3  6.931594 -0.287910\n",
      "4  6.138009  1.226438\n",
      "          0         1\n",
      "0  5.598703  0.836908\n",
      "1  5.634712 -1.557754\n",
      "2  6.336897 -0.560614\n",
      "3  7.355296 -0.268001\n",
      "4  7.612250  1.872543\n",
      "PCA\n",
      "2\n",
      "          0         1         2\n",
      "0  3.732135 -0.881428 -1.518709\n",
      "1  5.343661 -0.303588 -0.918677\n",
      "2  6.193566 -1.049941 -1.000707\n",
      "3  6.931593 -0.287904 -0.898759\n",
      "4  6.138009  1.226452  1.078723\n",
      "          0         1         2\n",
      "0  5.598703  0.836898  0.033983\n",
      "1  5.634712 -1.557763  0.116390\n",
      "2  6.336897 -0.560628 -1.773720\n",
      "3  7.355296 -0.268015 -2.090094\n",
      "4  7.612250  1.872559 -0.303591\n",
      "PCA\n",
      "3\n",
      "          0         1         2         3\n",
      "0  3.732135 -0.881401 -1.518760 -3.534311\n",
      "1  5.343661 -0.303574 -0.918694 -0.936988\n",
      "2  6.193566 -1.049921 -1.000734 -0.563838\n",
      "3  6.931593 -0.287907 -0.898770 -1.905539\n",
      "4  6.138009  1.226436  1.078761 -1.316587\n",
      "          0         1         2         3\n",
      "0  5.598703  0.836909  0.033977 -0.650111\n",
      "1  5.634712 -1.557748  0.116383 -2.451510\n",
      "2  6.336897 -0.560611 -1.773745  0.043611\n",
      "3  7.355296 -0.268003 -2.090102  0.718141\n",
      "4  7.612249  1.872541 -0.303563  2.393117\n",
      "PCA\n",
      "4\n",
      "          0         1         2         3         4\n",
      "0  3.732135 -0.881404 -1.518754 -3.534339 -0.508925\n",
      "1  5.343661 -0.303582 -0.918692 -0.937128 -1.042065\n",
      "2  6.193566 -1.049926 -1.000730 -0.563914 -0.351600\n",
      "3  6.931593 -0.287901 -0.898768 -1.905559 -2.472119\n",
      "4  6.138009  1.226438  1.078767 -1.316553 -1.186697\n",
      "          0         1         2         3         4\n",
      "0  5.598703  0.836909  0.033972 -0.650130 -0.897475\n",
      "1  5.634713 -1.557749  0.116381 -2.451554 -0.043760\n",
      "2  6.336897 -0.560616 -1.773744  0.043541 -1.302582\n",
      "3  7.355297 -0.268005 -2.090109  0.718145 -1.477404\n",
      "4  7.612249  1.872542 -0.303556  2.393102 -1.047476\n",
      "PCA\n",
      "5\n",
      "          0         1         2         3         4         5\n",
      "0  3.732135 -0.881402 -1.518745 -3.534381 -0.508605 -1.190338\n",
      "1  5.343661 -0.303582 -0.918710 -0.937004 -1.042515  1.125769\n",
      "2  6.193566 -1.049928 -1.000755 -0.563822 -0.352447 -0.142557\n",
      "3  6.931593 -0.287897 -0.898754 -1.905572 -2.472161  1.004880\n",
      "4  6.138009  1.226445  1.078765 -1.316567 -1.186139  0.561275\n",
      "          0         1         2         3         4         5\n",
      "0  5.598703  0.836908  0.033973 -0.650144 -0.897779 -0.736849\n",
      "1  5.634712 -1.557751  0.116378 -2.451536 -0.044483 -0.200229\n",
      "2  6.336897 -0.560619 -1.773756  0.043598 -1.302819 -0.000950\n",
      "3  7.355296 -0.268008 -2.090114  0.718126 -1.477440  0.433654\n",
      "4  7.612249  1.872544 -0.303550  2.393117 -1.047311  1.058601\n",
      "PCA\n",
      "6\n",
      "          0         1         2         3         4         5         6\n",
      "0  3.732150 -0.881526 -1.518844 -3.535655 -0.506445 -1.171073  1.694305\n",
      "1  5.343676 -0.303386 -0.919463 -0.931914 -1.048831  1.017892  1.939277\n",
      "2  6.193558 -1.049916 -1.000766 -0.562995 -0.355208 -0.167726  2.804673\n",
      "3  6.931562 -0.287429 -0.898279 -1.906278 -2.467515  1.024532  0.985934\n",
      "4  6.137988  1.226613  1.078626 -1.315678 -1.188067  0.534307 -1.321397\n",
      "          0         1         2         3         4         5         6\n",
      "0  5.598710  0.836954  0.033769 -0.649343 -0.897913 -0.748526  2.172053\n",
      "1  5.634739 -1.557830  0.115509 -2.451132 -0.045190 -0.212330  1.509658\n",
      "2  6.336912 -0.560630 -1.774233  0.045584 -1.305465 -0.031041  2.168963\n",
      "3  7.355339 -0.268072 -2.090833  0.719472 -1.477784  0.435985  1.444734\n",
      "4  7.612215  1.872558 -0.302801  2.393295 -1.049115  1.062604  0.099668\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "PCA\n",
      "7\n",
      "          0         1         2         3         4         5         6  \\\n",
      "0  3.732134 -0.881929 -1.518194 -3.535937 -0.512758 -1.184333  1.680644   \n",
      "1  5.343622 -0.304258 -0.918694 -0.937523 -1.041687  1.137931  1.933421   \n",
      "2  6.193575 -1.050230 -1.000483 -0.564902 -0.355452 -0.120534  2.782551   \n",
      "3  6.931599 -0.287945 -0.898660 -1.905586 -2.471584  1.002294  0.964448   \n",
      "4  6.137988  1.226452  1.078572 -1.317252 -1.185010  0.554438 -1.291416   \n",
      "\n",
      "          7  \n",
      "0 -0.767364  \n",
      "1 -0.098013  \n",
      "2 -0.168044  \n",
      "3  0.374994  \n",
      "4 -1.445592  \n",
      "          0         1         2         3         4         5         6  \\\n",
      "0  5.598699  0.836622  0.034238 -0.649815 -0.901211 -0.743671  2.156570   \n",
      "1  5.634721 -1.557739  0.116515 -2.450954 -0.044400 -0.199143  1.486017   \n",
      "2  6.336878 -0.561150 -1.773592  0.042599 -1.304167  0.007050  2.174340   \n",
      "3  7.355291 -0.268395 -2.089704  0.717450 -1.481340  0.419130  1.459335   \n",
      "4  7.612239  1.872299 -0.303510  2.393055 -1.048880  1.053691  0.104065   \n",
      "\n",
      "          7  \n",
      "0 -0.720854  \n",
      "1 -0.046468  \n",
      "2  0.128341  \n",
      "3  1.017727  \n",
      "4  0.183468  \n",
      "PCA\n",
      "8\n",
      "          0         1         2         3         4         5         6  \\\n",
      "0  3.732158 -0.881302 -1.518626 -3.534351 -0.502957 -1.199962  1.698287   \n",
      "1  5.343680 -0.303481 -0.918662 -0.937340 -1.037285  1.132192  1.921072   \n",
      "2  6.193566 -1.049857 -1.000654 -0.564701 -0.351731 -0.135124  2.779025   \n",
      "3  6.931628 -0.287931 -0.898443 -1.906659 -2.463797  0.980439  0.997727   \n",
      "4  6.137983  1.226346  1.078378 -1.316645 -1.188765  0.558156 -1.329011   \n",
      "\n",
      "          7         8  \n",
      "0 -0.879952  0.542588  \n",
      "1 -0.147640  1.210309  \n",
      "2 -0.177095 -0.338767  \n",
      "3  0.412621 -0.602039  \n",
      "4 -1.424378  0.216101  \n",
      "          0         1         2         3         4         5         6  \\\n",
      "0  5.598727  0.837014  0.034244 -0.650719 -0.892277 -0.749363  2.171051   \n",
      "1  5.634724 -1.557678  0.116310 -2.451556 -0.042078 -0.195359  1.486333   \n",
      "2  6.336876 -0.560630 -1.773900  0.043231 -1.302952  0.002381  2.169056   \n",
      "3  7.355280 -0.268074 -2.090116  0.717408 -1.476640  0.427783  1.458719   \n",
      "4  7.612228  1.872497 -0.303460  2.392603 -1.048080  1.046221  0.104068   \n",
      "\n",
      "          7         8  \n",
      "0 -0.723103  0.307801  \n",
      "1 -0.052856 -0.116899  \n",
      "2  0.128781  0.152913  \n",
      "3  1.049526 -0.150965  \n",
      "4  0.216414 -0.329853  \n",
      "PCA\n",
      "9\n",
      "          0         1         2         3         4         5         6  \\\n",
      "0  3.732146 -0.881433 -1.518796 -3.534653 -0.509491 -1.186836  1.690871   \n",
      "1  5.343647 -0.303665 -0.918479 -0.936422 -1.042188  1.126770  1.959410   \n",
      "2  6.193551 -1.049835 -1.000487 -0.564439 -0.350064 -0.131037  2.819387   \n",
      "3  6.931591 -0.287928 -0.898770 -1.905232 -2.472431  0.996085  0.972347   \n",
      "4  6.138016  1.226539  1.078308 -1.316830 -1.188211  0.555908 -1.345150   \n",
      "\n",
      "          7         8         9  \n",
      "0 -0.879842 -0.506213  0.089076  \n",
      "1 -0.185269 -1.231828  0.137918  \n",
      "2 -0.285453  0.356825 -0.094160  \n",
      "3  0.388265  0.721965 -1.278819  \n",
      "4 -1.397122 -0.276118 -0.465378  \n",
      "          0         1         2         3         4         5         6  \\\n",
      "0  5.598699  0.836838  0.033785 -0.649563 -0.898498 -0.737575  2.151999   \n",
      "1  5.634716 -1.557731  0.116604 -2.451630 -0.042320 -0.188921  1.508586   \n",
      "2  6.336886 -0.560654 -1.773389  0.043940 -1.301040  0.002143  2.197372   \n",
      "3  7.355295 -0.267947 -2.089898  0.717971 -1.475042  0.435695  1.469367   \n",
      "4  7.612237  1.872512 -0.303764  2.393789 -1.049114  1.039787  0.099213   \n",
      "\n",
      "          7         8         9  \n",
      "0 -0.743130 -0.267356 -0.023617  \n",
      "1 -0.101789  0.065456  0.542861  \n",
      "2  0.068022 -0.151012  0.169805  \n",
      "3  0.972652  0.192006  0.071680  \n",
      "4  0.227883  0.405669 -1.491627  \n"
     ]
    }
   ],
   "source": [
    "labels = [\"label\", \"label_q9\"]\n",
    "featureEs = [\"Chi\", \"PCA\"]\n",
    "modelTypelist = [\"XG\"]#[\"NB\", \"LR\", \"SVC1\", \"SVC2\", \"kNN3\", \"kNN5\", \"RF\"]\n",
    "\n",
    "for label in labels:\n",
    "    \n",
    "    target = list(traindata[label])\n",
    "    \n",
    "    for featureE in featureEs:\n",
    "    \n",
    "        featureDF = []\n",
    "        testDFs = []\n",
    "\n",
    "        if featureE == \"Chi\":\n",
    "            nFeatureList = list(np.arange(1,11,1))\n",
    "\n",
    "            for numberOfFeatures in nFeatureList:\n",
    "                chisetup = SelectKBest(chi2, k=numberOfFeatures)\n",
    "                chisetup = chisetup.fit(featureSubset, target)\n",
    "                featureSubset2 = chisetup.transform(featureSubset)\n",
    "                featureSubset2=pd.DataFrame(featureSubset2).assign(target = target)\n",
    "                featureDF.append(featureSubset2)\n",
    "                testSubset2 = chisetup.transform(testSubset)\n",
    "                testDFs.append(pd.DataFrame(testSubset2))\n",
    "\n",
    "        elif featureE == \"PCA\":\n",
    "            nFeatureList = list(np.arange(1,11,1))\n",
    "            for numberOfFeatures in nFeatureList:\n",
    "                pca = PCA(n_components=numberOfFeatures)\n",
    "                pca = pca.fit(featureSubset)\n",
    "                X_pca = pca.transform(featureSubset)\n",
    "                pcaDF = pd.DataFrame(X_pca)\n",
    "                pcaDF = pcaDF.assign(target = target)\n",
    "                featureDF.append(pcaDF)\n",
    "                testSubset2 = pca.transform(testSubset)\n",
    "                testDFs.append(pd.DataFrame(testSubset2))\n",
    "                \n",
    "                \n",
    "        #create lists to populate\n",
    "        flist = [] \n",
    "        mlist = []\n",
    "        llist = []\n",
    "        featureList = []\n",
    "        f1List = []\n",
    "        accuracyList = []\n",
    "        truePosList = []\n",
    "        trueNegList = []\n",
    "        falsePosList = []\n",
    "        falseNegList = []\n",
    "        predictions = []\n",
    "        rseed = []\n",
    "        \n",
    "        for f in range(0, len(featureDF)):\n",
    "            print(featureE)\n",
    "            print(f)\n",
    "            \n",
    "            train_phq9 = featureDF[f]\n",
    "            X_test = testDFs[f]\n",
    "            \n",
    "            # upsampling \n",
    "            #Count 1s and 0s\n",
    "            ones = len(train_phq9.loc[train_phq9['target'] == 1])\n",
    "            zeros = len(train_phq9.loc[train_phq9['target'] == 0])\n",
    "            if ones >= zeros:\n",
    "                majority = 1\n",
    "                minority = 0\n",
    "            else:\n",
    "                majority = 0\n",
    "                minority = 1\n",
    "\n",
    "            \n",
    "            # Upsample TrainingSet \n",
    "            train_majority = train_phq9[train_phq9.target==majority]\n",
    "            train_minority = train_phq9[train_phq9.target==minority]\n",
    "\n",
    "            # Upsample minority class\n",
    "            train_minority_upsampled = resample(train_minority, \n",
    "                                             replace=True,     # sample with replacement\n",
    "                                             n_samples=len(train_majority),    # to match majority class\n",
    "                                             random_state=42) # reproducible results\n",
    "\n",
    "            # Combine majority class with upsampled minority class\n",
    "            train_phq9 = pd.concat([train_majority, train_minority_upsampled])\n",
    "            \n",
    "            #seperate features and target\n",
    "            y_train = train_phq9[\"target\"]\n",
    "            X_train = train_phq9.drop(columns = \"target\")\n",
    "            \n",
    "            \n",
    "            print(X_train.head())\n",
    "            print(X_test.head())\n",
    "            \n",
    "            for r in [1,2,4,8,16,32,64,128,256,512]:\n",
    "                for modelType in modelTypelist:\n",
    "\n",
    "                    #add data to lists\n",
    "                    llist.append(label)\n",
    "                    featureList.append(f +1)\n",
    "                    flist.append(featureE)\n",
    "                    mlist.append(modelType)\n",
    "\n",
    "                    #chose model type\n",
    "                    if modelType == \"SVC1\":\n",
    "                        clf = svm.SVC(kernel='rbf', random_state=r)\n",
    "                    elif modelType == \"SVC2\":\n",
    "                        clf = svm.SVC(kernel='linear', random_state=r)\n",
    "                    elif modelType == \"RF\":\n",
    "                        clf = RandomForestClassifier(criterion=\"gini\", max_depth=3, random_state=r)\n",
    "                    elif modelType == \"kNN3\":\n",
    "                        clf = KNeighborsClassifier(n_neighbors=3)\n",
    "                    elif modelType == \"kNN5\":\n",
    "                        clf = KNeighborsClassifier(n_neighbors=5)\n",
    "                    elif modelType == \"XG\":\n",
    "                        clf = xgb.XGBClassifier(max_depth=3, random_state=r)\n",
    "                    elif modelType == \"LR\":\n",
    "                        clf = LogisticRegression(random_state=r)\n",
    "                    elif modelType == \"NB\":\n",
    "                        clf = GaussianNB()\n",
    "\n",
    "\n",
    "                    #train model and make predictions\n",
    "                    clf.fit(X_train, y_train)\n",
    "                    y_pred = clf.predict(X_test)\n",
    "\n",
    "                    #evaluate model\n",
    "                    conf_mat = confusion_matrix(list(testdata[label]), y_pred)\n",
    "                    TN = conf_mat[0][0]\n",
    "                    TP = conf_mat[1][1]\n",
    "                    FP = conf_mat[0][1]\n",
    "                    FN = conf_mat[1][0]\n",
    "                    precision = TP/(TP+FP)\n",
    "                    sensitivity = TP/(TP+FN)\n",
    "                    f1 = (2*precision*sensitivity)/(precision + sensitivity)\n",
    "                    accuracy = (TP+TN)/(TN+TP+FP+FN)\n",
    "\n",
    "                    #populate lists with results\n",
    "                    f1List.append(f1)\n",
    "                    accuracyList.append(accuracy)\n",
    "                    truePosList.append(TP)\n",
    "                    trueNegList.append(TN)\n",
    "                    falsePosList.append(FP)\n",
    "                    falseNegList.append(FN)\n",
    "                    predictions.append(y_pred)\n",
    "                    rseed.append(r)\n",
    "\n",
    "        resultsDF = pd.DataFrame()\n",
    "        resultsDF[\"label\"] = llist\n",
    "        resultsDF[\"Engineering\"] = flist\n",
    "        resultsDF[\"model\"] = mlist\n",
    "        resultsDF[\"nFeatures\"] = featureList\n",
    "        resultsDF[\"F1\"] = f1List\n",
    "        resultsDF[\"Accuracy\"] = accuracyList\n",
    "        resultsDF[\"truePos\"] = truePosList\n",
    "        resultsDF[\"trueNeg\"] = trueNegList\n",
    "        resultsDF[\"falsePos\"] = falsePosList\n",
    "        resultsDF[\"falseNeg\"] = falseNegList\n",
    "        resultsDF[\"predictions\"] = predictions\n",
    "        resultsDF[\"randomSeed\"] = rseed\n",
    "\n",
    "        resultsDF.to_csv(\"resultsAll/scripted\" + label + featureE + \"XG.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
