{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 274,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "1pKM2T0vClgw",
    "outputId": "889dcbd8-dd81-46ec-ace4-19301d56c4e0"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount(\"/content/gdrive\", force_remount=True).\n"
     ]
    }
   ],
   "source": [
    "from google.colab import drive\n",
    "drive.mount('/content/gdrive')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 275,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "2TA5bxYtD88e",
    "outputId": "27e62326-8016-4733-b1d4-d284f8106c75"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: portalocker>=2.0.0 in /usr/local/lib/python3.10/dist-packages (2.8.2)\n"
     ]
    }
   ],
   "source": [
    "pip install 'portalocker>=2.0.0'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 279,
   "metadata": {
    "id": "GF9TC2nTdQ9s"
   },
   "outputs": [],
   "source": [
    "import torchtext\n",
    "import torch\n",
    "import numpy as np\n",
    "from torchtext.vocab import GloVe\n",
    "from torchtext.data import get_tokenizer\n",
    "\n",
    "global_vectors = GloVe(name='6B', dim=300)\n",
    "\n",
    "tokenizer = get_tokenizer(\"basic_english\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "1z1CHc2McCbr"
   },
   "source": [
    "# Computer vs. Sports and Politics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 280,
   "metadata": {
    "id": "6J2UxxNTVf7F"
   },
   "outputs": [],
   "source": [
    "from sklearn.datasets import fetch_20newsgroups\n",
    "#from sklearn.feature_extraction.text import TfidfVectorizer\n",
    "\n",
    "categories_train = ['comp.graphics', 'comp.os.ms-windows.misc', 'comp.sys.ibm.pc.hardware', 'comp.sys.mac.hardware', 'comp.windows.x']\n",
    "newsgroups_train = fetch_20newsgroups(subset='train', categories=categories_train, random_state=None)\n",
    "\n",
    "# vectorizer = TfidfVectorizer()\n",
    "# vectors = vectorizer.fit_transform(newsgroups_train.data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 281,
   "metadata": {
    "id": "Gk3L7B0WZQdF"
   },
   "outputs": [],
   "source": [
    "embeddings_Computer_train = np.zeros((2936,300))\n",
    "labels_Computer_train = newsgroups_train.target\n",
    "\n",
    "i = 0\n",
    "\n",
    "for line in newsgroups_train.data:\n",
    "    embedding = global_vectors.get_vecs_by_tokens(tokenizer(line), lower_case_backup=True)\n",
    "    embedding_mean = torch.mean(embedding,0,True)\n",
    "    embeddings_Computer_train[i,:] = embedding_mean\n",
    "\n",
    "    i += 1\n",
    "\n",
    "#print(i)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 282,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "R79StqrLY3CQ",
    "outputId": "7eca137b-7542-4970-f87c-e90008f6a8a4"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(2936, 300)\n",
      "(2936,)\n",
      "[0 1 4 ... 2 4 1]\n"
     ]
    }
   ],
   "source": [
    "latent_train = embeddings_Computer_train\n",
    "y = labels_Computer_train\n",
    "print(latent_train.shape)\n",
    "print(y.shape)\n",
    "print(y)\n",
    "# np.random.shuffle(y)\n",
    "# print(y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 283,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "Y1MW2w-LZPJ2",
    "outputId": "5adaeb55-a26a-4293-c344-51e6e6e05cfc"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: PIMS in /usr/local/lib/python3.10/dist-packages (0.6.1)\n",
      "Requirement already satisfied: imageio in /usr/local/lib/python3.10/dist-packages (from PIMS) (2.31.3)\n",
      "Requirement already satisfied: numpy>=1.19 in /usr/local/lib/python3.10/dist-packages (from PIMS) (1.23.5)\n",
      "Requirement already satisfied: slicerator>=0.9.8 in /usr/local/lib/python3.10/dist-packages (from PIMS) (1.1.0)\n",
      "Requirement already satisfied: pillow>=8.3.2 in /usr/local/lib/python3.10/dist-packages (from imageio->PIMS) (9.4.0)\n"
     ]
    }
   ],
   "source": [
    "pip install PIMS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 284,
   "metadata": {
    "id": "1U4NdQKhc_ji"
   },
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "\n",
    "# importing relevant libraries\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import scipy as sp\n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier\n",
    "from sklearn.preprocessing import OneHotEncoder, StandardScaler\n",
    "from sklearn.model_selection import cross_val_predict, StratifiedKFold\n",
    "from sklearn.metrics import roc_auc_score, average_precision_score, precision_recall_curve, auc#plot_precision_recall_curve\n",
    "from sklearn.datasets import make_classification\n",
    "from sklearn.preprocessing import MinMaxScaler\n",
    "from tqdm import tqdm\n",
    "import sklearn\n",
    "#from umap import UMAP\n",
    "#from pynndescent import NNDescent\n",
    "\n",
    "#from fastcluster import single\n",
    "from scipy.cluster.hierarchy import cut_tree, fcluster, dendrogram\n",
    "from scipy.spatial.distance import squareform\n",
    "from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier\n",
    "from pims import ImageSequence\n",
    "from PIL import Image\n",
    "from scipy.spatial.distance import hamming\n",
    "\n",
    "# turning off automatic plot showing, and setting style\n",
    "plt.style.use('bmh')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 285,
   "metadata": {
    "id": "R0_lvNJxc_lb"
   },
   "outputs": [],
   "source": [
    "et = ExtraTreesClassifier(n_estimators=100, min_samples_leaf=10,\n",
    "                          max_features=\"sqrt\", bootstrap=True, class_weight='balanced', n_jobs=-1)\n",
    "\n",
    "# et = RandomForestClassifier(n_estimators=100, min_samples_leaf=10,\n",
    "#                           max_features=None, bootstrap=True, class_weight='balanced', n_jobs=-1)\n",
    "\n",
    "# validation instance\n",
    "skf = StratifiedKFold(n_splits=5, shuffle=True)\n",
    "\n",
    "# getting the model validation predictions\n",
    "preds = cross_val_predict(et, latent_train, y, cv=skf, method='predict_proba')\n",
    "\n",
    "# evaluating the model\n",
    "#print('Area under the ROC Curve:', roc_auc_score(y, preds, multi_class='ovo'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 286,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 92
    },
    "id": "OIoEy_y2c_nb",
    "outputId": "f85cc5c0-8cd0-44ca-c66a-2b04c96d9065"
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>#sk-container-id-14 {color: black;background-color: white;}#sk-container-id-14 pre{padding: 0;}#sk-container-id-14 div.sk-toggleable {background-color: white;}#sk-container-id-14 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-14 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-14 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-14 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-14 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-14 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-14 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-14 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-14 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-14 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-14 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-14 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-14 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-14 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-14 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-14 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-14 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-14 div.sk-item {position: relative;z-index: 1;}#sk-container-id-14 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-14 div.sk-item::before, #sk-container-id-14 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-14 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-14 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-14 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-14 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-14 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-14 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-14 div.sk-label-container {text-align: center;}#sk-container-id-14 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-14 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-14\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>ExtraTreesClassifier(bootstrap=True, class_weight=&#x27;balanced&#x27;,\n",
       "                     min_samples_leaf=10, n_jobs=-1)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-14\" type=\"checkbox\" checked><label for=\"sk-estimator-id-14\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">ExtraTreesClassifier</label><div class=\"sk-toggleable__content\"><pre>ExtraTreesClassifier(bootstrap=True, class_weight=&#x27;balanced&#x27;,\n",
       "                     min_samples_leaf=10, n_jobs=-1)</pre></div></div></div></div></div>"
      ],
      "text/plain": [
       "ExtraTreesClassifier(bootstrap=True, class_weight='balanced',\n",
       "                     min_samples_leaf=10, n_jobs=-1)"
      ]
     },
     "execution_count": 286,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "et.fit(latent_train,y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 287,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "ma5X2GYSdJ1n",
    "outputId": "ae7813fb-1e47-4018-e532-8c230a2a0360"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(2936, 100)\n",
      "[[121 188 106 ...  15 203 235]\n",
      " [203 231  81 ... 152  17  61]\n",
      " [212 263 165 ... 130 113 190]\n",
      " ...\n",
      " [153  78 231 ...  38   9 154]\n",
      " [239 189 123 ... 119  18 171]\n",
      " [185  60  73 ...  46 245  20]]\n",
      "0.992212024048099\n",
      "3.470153708164148e-07\n"
     ]
    }
   ],
   "source": [
    "leaves_train = et.apply(latent_train)\n",
    "print(leaves_train.shape)\n",
    "print(leaves_train)\n",
    "\n",
    "distances_train = np.zeros((500,500))\n",
    "\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_train[i,j] = hamming(leaves_train[i,:], leaves_train[j,:])\n",
    "\n",
    "score_train = sum(distances_train)/499\n",
    "\n",
    "print(np.mean(score_train))\n",
    "print(np.cov(score_train))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "XHYoZRRGff6G"
   },
   "source": [
    "## Testing on ID Data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "GZ_CNk6mflEl"
   },
   "source": [
    "### Computer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 288,
   "metadata": {
    "id": "kgbOSWxgdJ3r"
   },
   "outputs": [],
   "source": [
    "categories_test = ['comp.graphics', 'comp.os.ms-windows.misc', 'comp.sys.ibm.pc.hardware', 'comp.sys.mac.hardware', 'comp.windows.x']\n",
    "newsgroups_test = fetch_20newsgroups(subset='test', categories=categories_test, random_state=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 289,
   "metadata": {
    "id": "aLgdadSkdJ5e"
   },
   "outputs": [],
   "source": [
    "embeddings_Computer_test = np.zeros((500,300))\n",
    "labels_Computer_test = newsgroups_test.target\n",
    "\n",
    "i = 0\n",
    "\n",
    "for line in newsgroups_test.data:\n",
    "    if i >= 500:\n",
    "        break\n",
    "    embedding = global_vectors.get_vecs_by_tokens(tokenizer(line), lower_case_backup=True)\n",
    "    embedding_mean = torch.mean(embedding,0,True)\n",
    "    embeddings_Computer_test[i,:] = embedding_mean\n",
    "    i += 1\n",
    "\n",
    "#print(i)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 290,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "xziZBpigc_pQ",
    "outputId": "8f1cfc27-ac45-47e6-974b-4b0d380c0cde"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(500, 100)\n",
      "[[212  90 144 ... 123 267 198]\n",
      " [ 95 117 268 ... 258 223 132]\n",
      " [ 46   5 143 ... 183 179  27]\n",
      " ...\n",
      " [266 244  84 ... 248  36 258]\n",
      " [105 110 241 ... 105  87 154]\n",
      " [159  40 165 ...  38  43  47]]\n",
      "0.991432625250504\n",
      "5.661251040519464e-07\n"
     ]
    }
   ],
   "source": [
    "latent_test_in = embeddings_Computer_test\n",
    "\n",
    "leaves_test_in = et.apply(latent_test_in)\n",
    "print(leaves_test_in.shape)\n",
    "print(leaves_test_in)\n",
    "\n",
    "distances_test_in = np.zeros((500,500))\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_test_in[i,j] = hamming(leaves_test_in[i,:], leaves_test_in[j,:])\n",
    "\n",
    "score_test_in = sum(distances_test_in)/499\n",
    "\n",
    "print(np.mean(score_test_in))\n",
    "print(np.cov(score_test_in))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "TEoyGEgHkysM"
   },
   "source": [
    "## Testing on OOD Data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "R15ODdBUfMyK"
   },
   "source": [
    "### Sports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 291,
   "metadata": {
    "id": "hj6TMhfaeuzs"
   },
   "outputs": [],
   "source": [
    "categories_test = ['rec.sport.baseball', 'rec.sport.hockey']\n",
    "newsgroups_test = fetch_20newsgroups(subset='test', categories=categories_test, random_state=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 292,
   "metadata": {
    "id": "5ghi1Rmteu1_"
   },
   "outputs": [],
   "source": [
    "embeddings_Sports_test = np.zeros((500,300))\n",
    "labels_Sports_test = newsgroups_test.target\n",
    "\n",
    "i = 0\n",
    "\n",
    "for line in newsgroups_test.data:\n",
    "    if i>= 500:\n",
    "        break\n",
    "    embedding = global_vectors.get_vecs_by_tokens(tokenizer(line), lower_case_backup=True)\n",
    "    embedding_mean = torch.mean(embedding,0,True)\n",
    "    embeddings_Sports_test[i,:] = embedding_mean\n",
    "    i += 1\n",
    "\n",
    "#print(i)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 293,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "d1HAhvjyeu4T",
    "outputId": "87e1f4bb-f856-4c25-baa3-5b907100d0de"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(500, 100)\n",
      "[[ 82 158  89 ... 112 129  87]\n",
      " [215 204 184 ... 225 268  71]\n",
      " [215 268 224 ... 225 251  84]\n",
      " ...\n",
      " [ 15 170 229 ... 256 257  77]\n",
      " [239 176 204 ...  26  20 168]\n",
      " [ 32 204 246 ... 225  79 225]]\n",
      "0.9662597194388802\n",
      "0.00013963816500919878\n"
     ]
    }
   ],
   "source": [
    "latent_test_out_Sports = embeddings_Sports_test\n",
    "\n",
    "leaves_test_out_Sports = et.apply(latent_test_out_Sports)\n",
    "print(leaves_test_out_Sports.shape)\n",
    "print(leaves_test_out_Sports)\n",
    "\n",
    "distances_test_out_Sports = np.zeros((500,500))\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_test_out_Sports[i,j] = hamming(leaves_test_out_Sports[i,:], leaves_test_out_Sports[j,:])\n",
    "\n",
    "score_test_out_Sports = sum(distances_test_out_Sports)/499\n",
    "\n",
    "print(np.mean(score_test_out_Sports))\n",
    "print(np.cov(score_test_out_Sports))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "LOfVg_vl5rfc"
   },
   "source": [
    "### Politics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 294,
   "metadata": {
    "id": "4i4uK9rq5uc0"
   },
   "outputs": [],
   "source": [
    "categories_test = ['talk.politics.guns', 'talk.politics.mideast', 'talk.politics.misc']\n",
    "newsgroups_test = fetch_20newsgroups(subset='test', categories=categories_test, random_state=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 295,
   "metadata": {
    "id": "P5HEAtlY5ue6"
   },
   "outputs": [],
   "source": [
    "embeddings_Politics_test = np.zeros((500,300))\n",
    "labels_Politics_test = newsgroups_test.target\n",
    "\n",
    "i = 0\n",
    "\n",
    "for line in newsgroups_test.data:\n",
    "    if i>= 500:\n",
    "        break\n",
    "    embedding = global_vectors.get_vecs_by_tokens(tokenizer(line), lower_case_backup=True)\n",
    "    embedding_mean = torch.mean(embedding,0,True)\n",
    "    embeddings_Politics_test[i,:] = embedding_mean\n",
    "    i += 1\n",
    "\n",
    "#print(i)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 296,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "R6ts8j2e5uhN",
    "outputId": "d47da741-4450-49e4-e3bc-9be235a047b6"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(500, 100)\n",
      "[[ 34 151 167 ... 111 223  97]\n",
      " [248 270 229 ... 226 257  82]\n",
      " [152  67 229 ... 208 251  45]\n",
      " ...\n",
      " [153 263 161 ... 183 144  80]\n",
      " [215 268 187 ... 226  99  78]\n",
      " [ 19   5 145 ... 208 223  38]]\n",
      "0.9622487374749523\n",
      "0.0002168489247184269\n"
     ]
    }
   ],
   "source": [
    "latent_test_out_Politics = embeddings_Politics_test\n",
    "\n",
    "leaves_test_out_Politics = et.apply(latent_test_out_Politics)\n",
    "print(leaves_test_out_Politics.shape)\n",
    "print(leaves_test_out_Politics)\n",
    "\n",
    "distances_test_out_Politics = np.zeros((500,500))\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_test_out_Politics[i,j] = hamming(leaves_test_out_Politics[i,:], leaves_test_out_Politics[j,:])\n",
    "\n",
    "score_test_out_Politics = sum(distances_test_out_Politics)/499\n",
    "\n",
    "print(np.mean(score_test_out_Politics))\n",
    "print(np.cov(score_test_out_Politics))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "O2sjOYWZDluL"
   },
   "source": [
    "## Results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 297,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 517
    },
    "id": "xEdmoavjAX9W",
    "outputId": "5df46350-8fad-4fe6-cbb8-d1d5307a1684"
   },
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAz4AAAH0CAYAAAD8J/gjAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABaAUlEQVR4nO3de3iU9Z3//9fkNJNkEpKQE4QcyDkpGlpRfmhXvmxpobF+qVcPdOsWjN9SZMVeLm25iotKbS1re0lxWbdSv0X90quta6Vuj7g2i7SWFC1qtM2ZkAMhRwLkPElm7t8fltGRBCeIfnInz8d15XIzuefOaz575915cc/c47AsyxIAAAAAzGAhpgMAAAAAwHuN4gMAAABgxqP4AAAAAJjxKD4AAAAAZjyKDwAAAIAZj+IDAAAAYMaj+AAAAACY8cJMB7gUPp9Po6OjCg0NlcPhMB0HAAAAgCGWZcnr9SoiIkIhIZOf17Fl8RkdHdUf//hH0zEAAAAATBPXXXedXC7XpD+3ZfEJDQ2VJC1atMj/f2P2qampUWFhoekYAAxiDgCQmAWzndfr1V/+8pd37AW2LD7nX94WGhpK8ZnFUlNT+f8/MMsxBwBIzAK84Z3eAsPFDWBblmWZjgDAMOYAAIlZgOBQfGBbXV1dpiMAMIw5AEBiFiA4FB8AAAAAMx7FB7aVl5dnOgIAw5gDACRmAYJD8YFtnTx50nQEAIYxBwBIzAIEh+ID2xoeHjYdAYBhzAEAErMAwaH4wLYu9gFVAGYH5gAAiVmA4FB8YFvp6emmIwAwjDkAQGIWIDgUH9hWfX296QgADGMOAJCYBQgOxQcAAADAjEfxgW0lJyebjgDAMOYAAIlZgOBQfGBbISEcvsBsxxwAIDELEByOEthWR0eH6QgADGMOAJCYBQgOxQcAAADAjEfxgW3l5OSYjgDAMOYAAIlZgOBQfGBbnNYGwBwAIDELEByKD2xrcHDQdAQAhjEHAEjMAgQnzHQA4FI5nU7TEQAYxhwA7GdoaOiyfuDoyMiI/vznP6u/v18ul+uy7DMvL09RUVGXZV+YPig+sK2srCzTEQAYxhwA7Ke+vl4rVqwwHeOiDh06pJKSEtMxcJlRfGBbtbW1Ki4uNh0DgEHMAcB+8vLydOjQocu2v7q6Om3cuFF79+5Vfn7+ZdlnXl7eZdkPpheKDwAAAN43UVFR78nZlPz8fM7S4KK4uAFsKzEx0XQEAIYxBwAAwaL4wLYiIiJMRwBgGHMAABAsig9s69SpU6YjADCMOQAACBbFBwAAAMCMR/GBbS1cuNB0BACGMQcAAMGi+MC2uru7TUcAYBhzAAAQLIoPbGtgYMB0BACGMQcAAMGi+MC2wsPDTUcAYBhzAAAQLIoPbCs3N9d0BACGMQcAAMGi+MC2qqurTUcAYBhzAAAQLIoPAAAAgBmP4gPbSkhIMB0BgGHMAQBAsCg+sK3IyEjTEQAYxhwAAASL4gPbamtrMx0BgGHMAQBAsMJMB8DsMTQ0pPr6+suyr5GREVVUVKi/v18ul+uy7DMvL09RUVGXZV8AAACYXig+eN/U19drxYoVpmNM6tChQyopKTEdA8AUZGVlmY4AALAJig/eN3l5eTp06NBl2VddXZ02btyovXv3Kj8//7LsMy8v77LsB8D7p7e3lzO1AICgUHzwvomKirrsZ1Ty8/M5SwPMYn19faYjAABsgosbAABsKzQ01HQEAIBNcMYHAPC+udwXOWlpadHZs2cv20VOJC50AgAzFcUHAPC+me4XOZG40AkAzFQUHwDA+2a6X+RE4kInADBTUXxwUcePH9fAwIDpGBeoq6sL+O9043a7lZOTYzoGMO1wkRMAgCkUH0zq+PHjuvrqq03HuKiNGzeajjCpl156ifIDAAAwTVB8MKn+nl5lOZz6l3/5F2VmZJqOE8Az6tFrr72uK6+8Qs4Ip+k4AZpbmnX//ferv6dXovgAAABMCxQfTGq8tUPfDl8ofefH6jEdZgILJfXrOfWbDvI20ZK+Hb5Q460d0lLTaQAAACBRfHARYempumvshH6w9wfKu4xvHL5choeGFDkNLzlbX1enL238kvalp5qOAgAAgL+h+GBSI5ZXTZZHVUNnFG6NmI4TYGRkRJWvVaqkpOSyfn7H5XDCGlGT5ZHDFWE6CgAAAP6G4oNJnb9i2p133mk2iE253W7TEQAAAPA3FB9M6oYbbpD0xqViIyMjDacJ9F59fsflwuWsAQAAppdLKj4PP/ywvvvd76qjo0MlJSXas2ePrrnmmgm3HRsb086dO/XEE0+ora1NBQUFeuCBB7R69Wr/Nv39/br77rv185//XF1dXfrgBz+ohx56aNpfSnmmmzt3rtatW2c6xkXx+R0AAAAIRshU7/Dkk09qy5Ytuvfee/Xyyy+rpKREq1atUldX14Tbb9++XXv37tWePXtUVVWl2267TTfddJNeeeUV/zZf/OIX9dxzz2n//v16/fXX9bGPfUwrV65UW1vbpT8yAAAAAPibKRefXbt2acOGDSorK1NxcbEeeeQRRUVFad++fRNuv3//ft11110qLS1Vdna2Nm3apNLSUj344IOSpOHhYT399NP6zne+o+uvv165ubnasWOHcnNz9f3vf//dPToAAAAA0BSLz+joqI4dO6aVK1e+uYOQEK1cuVIVFRUT3sfj8Vxw1a3IyEi98MILkqTx8XF5vd6LbjOZ/v5+9fX1+b88Hs9UHg4AAACAWWJK7/Hp6emR1+tVSkpKwO0pKSmqqamZ8D6rVq3Srl27dP311ysnJ0fl5eU6cOCAvF6vJCkmJkbLli3TN7/5TRUVFSklJUU/+clPVFFRodzc3IvmWbRokYaGhvzfl5WV6Y477tC8efN0/PhxfzbLsvwvxcvLy9PJkyc1PDwsl8ul9PR01dfXS5KSk5MVEhKijo4OSVJOTo46Ojo0ODgop9OprKws1dbWSpISExMVERGhU6dOSZIWLlyo7u5uDQwMKDw8XLm5uaqurpYkJSQkKDIy0v/SvaysLPX29qqvr0+hoaEqKChQdXW1LMtSXFycYmJi1NraKknKyMhQX1+fzp49K4fDoaKiItXW1srr9So2Nlbx8fFqbm6WJC1YsEBDQ0Pq7e2VJBUXF6uurk7j4+OKiYlRYmKiTpw4IUmaP3++PB6PTp8+LUkqLCxUY2OjRkdHFR0drZSUFDU2NkqS5s2bp/HxcXV3d0t64301LS0tGhkZUWRkpNLS0tTQ0OBfb0nq7OyUJOXm5qqtrc2/3hkZGf6rxSUlJSksLEzt7e2SpOzsbHV2dmpwcFARERHKzs72H1dz586V0+n0r/f5ktvY2KjIyEjl5+erqqrKv95RUVE6efKkJCkzM1NnzpyZdL1jY2PV0tIiSUpPT1d/f/+k652QkKCmpiZJUlpamoaHh/3rXVRUpIaGBo2NjcntdispKSlgvUdHR9XT88ZHwRYUFKipqUkej0fR0dFKTU31H7Opqany+XwBx2xra6t/vRcsWBBwzDocDv965+TkqL29XUNDQ3I6ncrMzLzoend1dWlgYGDC9Xa5XBMes2FhYQHrHR8fL7fbHXDMnjt3TufOnVNISIgKCwtVU1Mjn8+nOXPmaM6cOQHrPTAwoDNnzlxwzE603iMjIxMes263W8nJyRc9Zpubm+XxeBQVFcWMmCEzwufzSXpjDqSmpgbMiIULF6qnp0f9/f0XHLPMCGYEM2JmzYjzv8fj8ai5uTmo5xHMiJk1I86v4TtxWJZlBbWlpFOnTiktLU1HjhzRsmXL/Ldv3bpVhw8f1tGjRy+4T3d3tzZs2KBf/vKXcjgcysnJ0cqVK7Vv3z4NDw9Lko4fP65bb71Vv//97xUaGqoPfehDys/P17Fjx/x/9G81Pj6uw4cPKzs7WyEhb560cjqdcjqdwT4cvM+Ghob8f2jvhtfr1W9+8xvt2rVLW7ZsUWlpqUJDQ9/1fvPy8hQ1DT8QFcDEKisrtWLFCh06dIiLnACzGLMAXq9XlZWVWr58ucLCJj+vM6UzPomJiQoNDfW3wvM6OzuVmjrxp9QnJSXpmWee8f8rzPz58/X1r39d2dnZ/m1ycnJ0+PBhDQ4Oqq+vT/PmzdPatWsDtplITEzMZXnCi/dHfX29VqxYcVn3uWvXLu3ateuy7IuBCQAAMHNNqfhEREToqquuUnl5uT75yU9Kknw+n8rLy7V58+aL3tflciktLU1jY2N6+umn9dnPfvaCbaKjoxUdHa0zZ87o2Wef1Xe+852pxMM0l5eXp0OHDl3y/X//+99rx44dWrZsmT796U9rbGxM4eHh+tnPfqaKigrt2LFD119//bvKBwAAgJlpyp/js2XLFq1fv15LlizRNddco927d2twcFBlZWWSpHXr1iktLU07d+6UJB09elRtbW1avHix2tratGPHDvl8Pm3dutW/z2effVaWZamgoEANDQ362te+psLCQv8+MTNERUVd8hkVr9er9evXa9WqVdq3b58ee+wx/fWvf9UHPvAB/exnP9Ott96qffv26Z/+6Z84CwgAAIALTLn4rF27Vt3d3brnnnvU0dGhxYsX6+DBg/43o7W0tAS872ZkZETbt29XY2Oj3G63SktLtX//fsXFxfm3OXfunLZt26aTJ08qISFBn/rUp3T//fcrPDz83T9CzAgVFRVqaWnR0qVLlZ6e7r84hiTdc889uummm9Tc3KyKigp9+MMfNpgUAAAA09GUi48kbd68edKXtj3//PMB3y9fvtx/1YbJfPazn53wpW/AeeffV/bUU08pKSlJ//Iv/6KFCxfqxIkTuv/++/Wzn/0sYDsAAADgrab8AaaACQkJCZKkuLg4vfbaa/5LKGZnZ+u1117zn0E8vx0AAADwVpd0xgd4v50/axgTE6OlS5f6r98uvXG995iYGJ09e1ZVVVWX/cpxAAAAsD+KD2zh/Iertba2KikpSd/73veUnZ2txsZGffvb3/Z/KNr57QAAAIC3ovjAFjIzMyW98Qm9kvTP//zP/p9lZGQoPT1dra2t/u0AAACAt6L4wBaKi4slSf39/aqurtZLL72k1tZWpaen6+qrr1ZRUVHAdgAAAMBbcXED2EJvb68k6ezZs7ryyivV0NCgtLQ0NTQ06Morr9TZs2cDtgMAAADeijM+sIXznxP1mc98RgcOHNCWLVv8PwsLC9OnP/1p/exnP/NvBwAAALwVxQe2sGzZMmVkZKi/v1+tra167LHHVFNTo8LCQpWVlenWW29VZmamli1bZjoqAAAApiGKD2whNDRU3/zmN3XLLbfo1ltv1Z133qlVq1app6dHt956q5599lk9/vjjCg0NNR0VAAAA0xDFB7Zx44036vHHH9fdd9+t1atX+2/PzMzU448/rhtvvNFgOgAAAExnFB/Yyo033qjS0lJVVFTo2LFjuuqqq7Rs2TLO9AAAAOCiKD6wndDQUH34wx/Wtddeq5AQLkwIAACAd8azRthWY2Oj6QgAAACwCYoPbGt0dNR0BAAAANgExQe2FR0dbToCAAAAbILiA9viw0oBAAAQLIoPbIv3+AAAACBYFB8AAAAAMx7FB7Y1b9480xEAAABgExQf2Nb4+LjpCAAAALAJig9sq7u723QEAAAA2ATFBwAAAMCMR/GBbeXn55uOAAAAAJug+MC2WlpaTEcAAACATVB8YFsjIyOmIwAAAMAmKD6wrcjISNMRAAAAYBMUH9hWWlqa6QgAAACwCYoPbKuhocF0BAAAANgExQcAAADAjEfxgW2lpKSYjgAAAACboPgAAAAAmPEoPrCtzs5O0xEAAABgExQfAAAAADMexQe2lZubazoCAAAAbILiA9tqa2szHQEAAAA2QfGBbQ0PD5uOAAAAAJug+MC2XC6X6QgAAACwCYoPbCsjI8N0BAAAANgExQe2VVdXZzoCAAAAbILiAwAAAGDGo/jAtpKSkkxHAAAAgE1QfGBbYWFhpiMAAADAJig+sK329nbTEQAAAGATFB8AAAAAMx7FB7aVnZ1tOgIAAABsguID2+rs7DQdAQAAADZB8YFtDQ4Omo4AAAAAm6D4wLYiIiJMRwAAAIBNXFLxefjhh5WVlSWXy6WlS5fqxRdfnHTbsbEx3XfffcrJyZHL5VJJSYkOHjwYsI3X69Xdd9+thQsXKjIyUjk5OfrmN78py7IuJR5mCd7jAwAAgGBNufg8+eST2rJli+699169/PLLKikp0apVq9TV1TXh9tu3b9fevXu1Z88eVVVV6bbbbtNNN92kV155xb/NAw88oO9///v693//d1VXV+uBBx7Qd77zHe3Zs+fSHxlmvJqaGtMRAAAAYBNTLj67du3Shg0bVFZWpuLiYj3yyCOKiorSvn37Jtx+//79uuuuu1RaWqrs7Gxt2rRJpaWlevDBB/3bHDlyRGvWrNENN9ygrKwsffrTn9bHPvaxi55JAgAAAIBgTan4jI6O6tixY1q5cuWbOwgJ0cqVK1VRUTHhfTwej1wuV8BtkZGReuGFF/zfX3vttSovL1ddXZ0kqbKyUi+88II+/vGPXzRPf3+/+vr6/F8ej2cqDwc2N3fuXNMRAAAAYBNhU9m4p6dHXq9XKSkpAbenpKRM+rKjVatWadeuXbr++uuVk5Oj8vJyHThwQF6v17/N17/+dfX19amwsFChoaHyer26//77dfPNN180z6JFizQ0NOT/vqysTHfccYfmzZun48eP+7NZluV/KV5eXp5Onjyp4eFhuVwupaenq76+XpKUnJyskJAQdXR0SJJycnLU0dGhwcFBOZ1OZWVlqba2VpKUmJioiIgInTp1SpK0cOFCdXd3a2BgQOHh4crNzVV1dbUkKSEhQZGRkWpra5MkZWVlqbe3V319fQoNDVVBQYGqq6tlWZbi4uIUExOj1tZWSVJGRob6+vp09uxZORwOFRUVqba2Vl6vV7GxsYqPj1dzc7MkacGCBRoaGlJvb68kqbi4WHV1dRofH1dMTIwSExN14sQJSdL8+fPl8Xh0+vRpSVJhYaEaGxs1Ojqq6OhopaSkqLGxUZI0b948jY+Pq7u7W5KUn5+vlpYWjYyMKDIyUmlpaWpoaPCvt/TmpaZzc3PV1tbmX++MjAx/wU1KSlJYWJja29slvfGenc7OTg0ODioiIkLZ2dn+42ru3LlyOp0B693X16fTp08rLCxM+fn5qqqq8q93VFSUTp48KUnKzMzUmTNnJl3v2NhYtbS0SJLS09PV398/6XonJCSoqalJkpSWlqbh4WH/ehcVFamhoUFjY2Nyu91KSkoKWO/R0VH19PRIkgoKCtTU1CSPx6Po6Gilpqb6j9nU1FT5fL6AY7a1tdW/3gsWLAg4Zh0Oh3+9c3Jy1N7erqGhITmdTmVmZl50vbu6ujQwMDDhertcrgmP2bevd3x8vNxud8Axe+7cOZ07d04hISEqLCxUTU2NfD6f5syZozlz5gSs98DAgM6cOXPBMTvReo+MjEx4zLrdbiUnJ1/0mG1ubpbH41FUVBQzYobMCJ/PJ0lqbGxUamrqBTOip6dH/f39zAhmBDNihs+I87/H4/Goubk56OcRzIiZMyPOr+E7cVhTuILAqVOnlJaWpiNHjmjZsmX+27du3arDhw/r6NGjF9ynu7tbGzZs0C9/+Us5HA7l5ORo5cqV2rdvn4aHhyVJP/3pT/W1r31N3/3ud/WBD3xAr776qu68807t2rVL69evv2Cf4+PjOnz4sLKzsxUS8uZJK6fTKafTGezDgc1VVVWpuLjYdAwAhlRWVmrFihU6dOiQSkpKTMcBYAizAF6vV5WVlVq+fLnCwiY/rzOlMz6JiYkKDQ294IMjOzs7lZqaOuF9kpKS9Mwzz/j/FWb+/Pn6+te/HnBFrq997Wv6+te/rs997nOSpCuuuELNzc3auXPnhMXnvJiYGIWGhk7lIQAAAACYhab0Hp+IiAhdddVVKi8v99/m8/lUXl4ecAZoIi6XS2lpaRofH9fTTz+tNWvW+H82NDQUcOZGkkJDQ/0vYwAmsnDhQtMRAAAAYBNTOuMjSVu2bNH69eu1ZMkSXXPNNdq9e7cGBwdVVlYmSVq3bp3S0tK0c+dOSdLRo0fV1tamxYsXq62tTTt27JDP59PWrVv9+7zxxht1//33KyMjQx/4wAf0yiuvaNeuXbr11lsv08PETNTT06P09HTTMQAAAGADUy4+a9euVXd3t+655x51dHRo8eLFOnjwoP/NaC0tLQFnb0ZGRrR9+3Y1NjbK7XartLRU+/fvV1xcnH+bPXv26O6779Y//dM/qaurS/Pnz9fGjRt1zz33vPtHiBmrv7/fdAQAAADYxJSLjyRt3rxZmzdvnvBnzz//fMD3y5cv91+1YTIxMTHavXu3du/efSlxMEtd7M1rAAAAwFtN+QNMgekiPz/fdAQAAADYBMUHtvVOZxIBAACA8yg+AAAAAGY8ig9sKyEhwXQEAAAA2ATFB7YVFRVlOgIAAABsguID2zp58qTpCAAAALAJig8AAACAGY/iA9vKzMw0HQEAAAA2QfGBbZ05c8Z0BAAAANgExQe21dfXZzoCAAAAbILiA9sKDQ01HQEAAAA2QfGBbRUUFJiOAAAAAJug+MC2qqurTUcAAACATYSZDgBcKsuyTEcAZoXjx49rYGDAdIwL1NXVBfx3unG73crJyTEdAwDwNxQf2FZcXJzpCMCMd/z4cV199dWmY1zUxo0bTUeY1EsvvUT5AYBpguID24qNjTUdAZjxzp/p2bt3r/Lz8w2nCTQyMqK6ujrl5+fL5XKZjhOgrq5OGzdunJZnygBgtqL4wLZaWlpUXFxsOgYwK+Tn56ukpMR0jAvExMQwBwAAQeHiBgAAAABmPIoPbCs9Pd10BACGMQcAAMGi+MC2+vv7TUcAYBhzAAAQLIoPbOvs2bOmIwAwjDkAAAgWxQe25XA4TEcAYBhzAAAQLIoPbKuoqMh0BACGMQcAAMGi+MC2amtrTUcAYBhzAAAQLIoPbMvr9ZqOAMAw5gAAIFgUH9hWbGys6QgADGMOAACCRfGBbSUkJJiOAMAw5gAAIFgUH9hWU1OT6QgADGMOAACCRfEBAAAAMOOFmQ4AXKq0tDTTEQAYxhwA3j/Hjx/XwMCA6RgXqKurC/jvdOJ2u5WTk2M6Bv6G4gPbGh4e1pw5c0zHAGAQcwB4fxw/flxXX3216RgXtXHjRtMRJvTSSy9RfqYJig9sq7e3V6mpqaZjADCIOQC8P86f6dm7d6/y8/MNpwk0MjKiiooKLVu2TC6Xy3Qcv7q6Om3cuHFaniWbrSg+AAAACEp+fr5KSkpMx7hATEyMiouLTcfANMfFDWBbRUVFpiMAMIw5AEBiFiA4FB/YVkNDg+kIAAxjDgCQmAUIDsUHtjU2NmY6AgDDmAMAJGYBgkPxgW253W7TEQAYxhwAIDELEByKD2wrKSnJdAQAhjEHAEjMAgSH4gPbOnHihOkIAAxjDgCQmAUIDsUHAAAAwIxH8YFtzZ8/33QEAIYxBwBIzAIEh+ID2xodHTUdAYBhzAEAErMAwaH4wLZ6enpMRwBgGHMAgMQsQHAoPgAAAABmPIoPbKugoMB0BACGMQcASMwCBIfiA9tqamoyHQGAYcwBABKzAMGh+MC2PB6P6QgADGMOAJCYBQgOxQe2FR0dbToCAMOYAwAkZgGCc0nF5+GHH1ZWVpZcLpeWLl2qF198cdJtx8bGdN999yknJ0cul0slJSU6ePBgwDZZWVlyOBwXfN1+++2XEg+zRGpqqukIAAxjDgCQmAUIzpSLz5NPPqktW7bo3nvv1csvv6ySkhKtWrVKXV1dE26/fft27d27V3v27FFVVZVuu+023XTTTXrllVf827z00ktqb2/3fz333HOSpM985jOX+LAwGxw/ftx0BACGMQcASMwCBGfKxWfXrl3asGGDysrKVFxcrEceeURRUVHat2/fhNvv379fd911l0pLS5Wdna1NmzaptLRUDz74oH+bpKQkpaam+r9+9atfKScnR8uXL79olv7+fvX19fm/eH0nAAAAgImETWXj0dFRHTt2TNu2bfPfFhISopUrV6qiomLC+3g8HrlcroDbIiMj9cILL0z6O370ox9py5YtcjgcF82zaNEiDQ0N+b8vKyvTHXfcoXnz5vmbf0pKiizL8p+RysvL08mTJzU8PCyXy6X09HTV19dLkpKTkxUSEqKOjg5JUk5Ojjo6OjQ4OCin06msrCzV1tZKkhITExUREaFTp05JkhYuXKju7m4NDAwoPDxcubm5qq6uliQlJCQoMjJSbW1tkt54aV9vb6/6+voUGhqqgoICVVdXy7IsxcXFKSYmRq2trZKkjIwM9fX16ezZs3I4HCoqKlJtba28Xq9iY2MVHx+v5uZmSdKCBQs0NDSk3t5eSVJxcbHq6uo0Pj6umJgYJSYm6sSJE5Kk+fPny+Px6PTp05KkwsJCNTY2anR0VNHR0UpJSVFjY6Mkad68eRofH1d3d7ckKT8/Xy0tLRoZGVFkZKTS0tLU0NDgX29J6uzslCTl5uaqra3Nv94ZGRmqq6uT9EbhDQsLU3t7uyQpOztbnZ2dGhwcVEREhLKzs1VTUyNJmjt3rpxOZ8B6h4WFqaqqSmFhYcrPz1dVVZV/vaOionTy5ElJUmZmps6cOTPpesfGxqqlpUWSlJ6erv7+/knXOyEhwX/lmLS0NA0PD/vXu6ioSA0NDRobG5Pb7VZSUlLAeo+Ojvo/YK2goEBNTU3yeDyKjo5Wamqq/5hNTU2Vz+cLOGZbW1v9671gwYKAY9bhcPjXOycnR+3t7RoaGpLT6VRmZuZF17urq0sDAwMTrrfL5ZrwmH37esfHx8vtdgccs+fOndO5c+cUEhKiwsJC1dTUyOfzac6cOZozZ07Aeg8MDOjMmTMXHLMTrffIyMiEx6zb7VZycvJFj9nm5mZ5PB5FRUUxI6YwI86vqc/nU0NDw7SbEaOjo6qqqppwRvT09Ki/v9/IjDi/NqdPn/b/XmYEM8LOM+L8MdTY2Kjw8PBp9zxidHRUIyMjU3oe8V7PiPN/383NzQoPD+d5xHs4I86v4TtxWJZlBbWlpFOnTiktLU1HjhzRsmXL/Ldv3bpVhw8f1tGjRy+4z+c//3lVVlbqmWeeUU5OjsrLy7VmzRp5vd4Jz9D853/+pz7/+c+rpaVF8+fPnzDH+Pi4Dh8+rOzsbIWEvHnSyul0yul0BvtwYHM9PT1KTEw0HQOY0SorK7VixQodOnRIJSUlpuNcYLrOgem+bsBUTfdjejrOgum+ZjOJ1+tVZWWlli9frrCwyc/rvOdXdXvooYeUl5enwsJCRUREaPPmzSorKwsoLG/1wx/+UB//+McnLT1vFRMTo9jYWP8XpWd2mex9ZQBmD+YAAIlZgOBMqfgkJiYqNDTUfzrsvM7OzkmvppGUlKRnnnlGg4ODam5uVk1Njdxut7Kzsy/Ytrm5Wb/73e/0xS9+cSqxAAAAAOCiplR8IiIidNVVV6m8vNx/m8/nU3l5ecBL3ybicrmUlpam8fFxPf3001qzZs0F2zz22GNKTk7WDTfcMJVYmKXy8vJMRwBgGHMAgMQsQHCm/FK3LVu26NFHH9UTTzyh6upqbdq0SYODgyorK5MkrVu3LuDiB0ePHtWBAwfU2NioP/zhD1q9erV8Pp+2bt0asF+fz6fHHntM69evv+hr84Dzzr8JDsDsxRwAIDELEJwpN4y1a9equ7tb99xzjzo6OrR48WIdPHjQfxWOlpaWgPfvjIyMaPv27WpsbJTb7VZpaan279+vuLi4gP3+7ne/U0tLi2699dZ394gwa4yMjJiOAMAw5gAAiVmA4FzSqZXNmzdr8+bNE/7s+eefD/h++fLl/svVXczHPvYxTeECc4AiIyNNRwBgGHMAgMQsQHDe86u6Ae+VBQsWmI4AwDDmAACJWYDgUHxgW+c/fAvA7MUcACAxCxAcig8AAACAGY/iA9tKTk42HQGAYcwBABKzAMGh+MC2HA6H6QgADGMOAJCYBQgOxQe21dnZaToCAMOYAwAkZgGCQ/EBAAAAMONRfGBbOTk5piMAMIw5AEBiFiA4FB/YVnt7u+kIAAxjDgCQmAUIDsUHtjU0NGQ6AgDDmAMAJGYBgkPxgW05nU7TEQAYxhwAIDELEByKD2wrMzPTdAQAhjEHAEjMAgQnzHQA4FLV1dWpuLjYdAxgRrNGRpXlcGqsvkXnHC7TcS5worFRC7OzTce4wFh9i7IcTlkjo6ajALMCzwkQDIoPAGBS460d+nb4QvVs3qke02Em0WE6wCS+Hb5Q460d0lLTSQAAEsUHNpaUlGQ6AjDjhaWn6q6xE/rB3h8oLz/fdJwLnD1zRnHx8aZjXKC+rk5f2vgl7UtPNR0FmBV4ToBgUHxgW2FhHL7Ae83hilCT5VF4XobmXFlgOs4FfGfOaM40LD7h1oiaLI8crgjTUYBZgecECAYXN4Btcc1+AMwBABKzAMGh+AAAAACY8Sg+sK3saXglJwDvL+YAAIlZgOBQfGBbXV1dpiMAMIw5AEBiFiA4FB/Y1sDAgOkIAAxjDgCQmAUIDsUHthURwdWSgNmOOQBAYhYgOBQf2Bav5wXAHAAgMQsQHIoPbKumpsZ0BACGMQcASMwCBIfiAwAAAGDGo/jAtubOnWs6AgDDmAMAJGYBgkPxgW25XC7TEQAYxhwAIDELEByKD2yrra3NdAQAhjEHAEjMAgSH4gMAAABgxqP4wLaysrJMRwBgGHMAgMQsQHAoPrCt3t5e0xEAGMYcACAxCxAcig9sq6+vz3QEAIYxBwBIzAIEh+ID2woLCzMdAYBhzAEAErMAwaH4wLby8/NNRwBgGHMAgMQsQHAoPrCtqqoq0xEAGMYcACAxCxAcig8AAACAGY/iA9uKj483HQGAYcwBABKzAMGh+MC23G636QgADGMOAJCYBQgOxQe21draajoCAMOYAwAkZgGCw7X/AAAAcFHWyKiyHE6N1bfonMNlOs4FRhubdW481HSMAGP1LcpyOGWNjJqOgr+h+MC2MjIyTEcAYBhzAHh/jLd26NvhC9Wzead6TIeZRIfpABP4dvhCjbd2SEtNJ4FE8YGNnTt3jtf0ArMccwB4f4Slp+qusRP6wd4fKG8afmZOd1eXkpKTTccIUF9Xpy9t/JL2paeajoK/ofjAts6dO6e0tDTTMQAYxBwA3h8OV4SaLI/C8zI058oC03Eu0Fbl1Zzi6ZUr3BpRk+WRwxVhOgr+hosbwLZCQjh8gdmOOQBAYhYgOBwlsK3CwkLTEQAYxhwAIDELEByKD2yrpqbGdAQAhjEHAEjMAgSH4gPb8vl8piMAMIw5AEBiFiA4l1R8Hn74YWVlZcnlcmnp0qV68cUXJ912bGxM9913n3JycuRyuVRSUqKDBw9esF1bW5v+8R//UXPnzlVkZKSuuOIK/fnPf76UeJgl5syZYzoCAMOYAwAkZgGCM+Xi8+STT2rLli2699579fLLL6ukpESrVq1SV1fXhNtv375de/fu1Z49e1RVVaXbbrtNN910k1555RX/NmfOnNF1112n8PBw/fa3v1VVVZUefPBBxcfHX/ojw4zHkAPAHAAgMQsQnCkXn127dmnDhg0qKytTcXGxHnnkEUVFRWnfvn0Tbr9//37dddddKi0tVXZ2tjZt2qTS0lI9+OCD/m0eeOABpaen67HHHtM111yjhQsX6mMf+5hycnIu/ZFhxmtpaTEdAYBhzAEAErMAwZlS8RkdHdWxY8e0cuXKN3cQEqKVK1eqoqJiwvt4PB65XK6A2yIjI/XCCy/4v//FL36hJUuW6DOf+YySk5P1wQ9+UI8++ug75unv71dfX5//y+PxTOXhAAAAAJglpvQBpj09PfJ6vUpJSQm4PSUlZdKraaxatUq7du3S9ddfr5ycHJWXl+vAgQPyer3+bRobG/X9739fW7Zs0V133aWXXnpJX/7ylxUREaH169dPmmfRokUaGhryf19WVqY77rhD8+bN0/Hjx/3ZLMvyvxQvLy9PJ0+e1PDwsFwul9LT01VfXy9JSk5OVkhIiDo6OiRJOTk56ujo0ODgoJxOp7KyslRbWytJSkxMVEREhE6dOiVJWrhwobq7uzUwMKDw8HDl5uaqurpakpSQkKDIyEi1tbVJkrKystTb26u+vj6FhoaqoKBA1dXVsixLcXFxiomJUWtrqyQpIyNDfX19Onv2rBwOh4qKilRbWyuv16vY2FjFx8erublZkrRgwQINDQ2pt7dXklRcXKy6ujqNj48rJiZGiYmJOnHihCRp/vz58ng8On36tKQ3LgPZ2Nio0dFRRUdHKyUlRY2NjZKkefPmaXx8XN3d3ZKk/Px8tbS0aGRkRJGRkUpLS1NDQ4N/vSWps7NTkpSbm6u2tjb/emdkZKiurk6SlJSUpLCwMLW3t0uSsrOz1dnZqcHBQUVERCg7O9t/XM2dO1dOpzNgvSMiIlRVVaWwsDDl5+erqqrKv95RUVE6efKkJCkzM1NnzpyZdL1jY2P9/1KUnp6u/v7+Sdc7ISFBTU1NkqS0tDQNDw/717uoqEgNDQ0aGxuT2+1WUlJSwHqPjo6qp6dHklRQUKCmpiZ5PB5FR0crNTXVf8ympqbK5/MFHLOtra3+9V6wYEHAMetwOPzrnZOTo/b2dg0NDcnpdCozM/Oi693V1aWBgYEJ19vlck14zL59vePj4+V2uwOO2XPnzuncuXMKCQlRYWGhampq5PP5NGfOHM2ZMydgvQcGBnTmzJkLjtmJ1ntkZGTCY9btdis5Ofmix2xzc7M8Ho+ioqKYEVOYEefX1OfzqaGhYdrNiPHxcVVVVU04I3p6etTf329kRpxfm9OnT/t/LzOCGWHnGXH+GGpsbFR4ePi0ex4xPj6ukZGRKT2PeK9nxPm/7+bmZoWHh/M84j2cEefX8J04LMuygtpS0qlTp5SWlqYjR45o2bJl/tu3bt2qw4cP6+jRoxfcp7u7Wxs2bNAvf/lLORwO5eTkaOXKldq3b5+Gh4clSREREVqyZImOHDniv9+Xv/xlvfTSSxOeSRofH9fhw4eVnZ0d8IFVTqdTTqcz2IcDm2tvb9e8efNMxwBmtMrKSq1YsUKHDh1SSUmJ6TgXmK5zYLqvGzBV0/2Yno6zYLqv2Uzi9XpVWVmp5cuXKyxs8vM6U3qpW2JiokJDQ/2t8LzOzk6lpqZOeJ+kpCQ988wzGhwcVHNzs2pqauR2u5Wdne3fZt68eSouLg64X1FR0Tu+XjMmJkaxsbH+L0rP7HL+XwABzF7MAQASswDBmVLxiYiI0FVXXaXy8nL/bT6fT+Xl5QFngCbicrmUlpam8fFxPf3001qzZo3/Z9ddd53/1O95dXV1yszMnEo8AAAAAJjQlN7jI0lbtmzR+vXrtWTJEl1zzTXavXu3BgcHVVZWJklat26d0tLStHPnTknS0aNH1dbWpsWLF6utrU07duyQz+fT1q1b/fv853/+Z1177bX69re/rc9+9rN68cUX9YMf/EA/+MEPLtPDxEz09rOEAGYf5gAAiVmA4Ey5+Kxdu1bd3d2655571NHRocWLF+vgwYP+N6O1tLQEvO9mZGRE27dvV2Njo9xut0pLS7V//37FxcX5t7n66qv185//XNu2bdN9992nhQsXavfu3br55pvf/SPEjFVXV6f8/HzTMQAYxBwAIDELEJwpFx9J2rx5szZv3jzhz55//vmA75cvX+6/asPFfOITn9AnPvGJS4mDWWp8fNx0BACGMQcASMwCBGfKH2AKTBexsbGmIwAwjDkAQGIWIDgUH9hWQkKC6QgADGMOAJCYBQgOxQe2df4DwADMXswBABKzAMGh+AAAAACY8Sg+sK20tDTTEQAYxhwAIDELEByKD2xrZGTEdAQAhjEHAEjMAgTnki5nDUwHp0+f9n9+FID3xvDwsCSpsrLScJILjYyMqKKiQsuWLZPL5TIdJ0BdXZ3pCMCswnMCBIPiAwCY1Pkn8HfeeafZIDbldrtNRwAA/A3FB7ZVWFhoOgIw491www2SpPz8fEVGRhpOE6iurk4bN27U3r17p+UntrvdbuXk5JiOAcwKPCdAMCg+sK3Gxkbl5uaajgHMaHPnztW6detMx7io/Px8lZSUmI4BwCCeEyAYXNwAtjU6Omo6AgAAmAZ4ToBgUHxgW7x2HgAASDwnQHAoPrCt5ORk0xEAAMA0wHMCBIPiA9tqbGw0HQEAAEwDPCdAMCg+AAAAAGY8ig9sa968eaYjAACAaYDnBAgGxQe2NT4+bjoCAACYBnhOgGBQfGBb3d3dpiMAAIBpgOcECAbFBwAAAMCMR/GBbeXn55uOAAAApgGeEyAYFB/YVnNzs+kIAABgGuA5AYJB8YFteTwe0xEAAMA0wHMCBIPiA9uKiooyHQEAAEwDPCdAMCg+sC2u2Q8AACSeEyA4FB/Y1vHjx01HAAAA0wDPCRAMig8AAACAGY/iA9tKSUkxHQEAAEwDPCdAMCg+sC3LskxHAAAA0wDPCRAMig9sq6ury3QEAAAwDfCcAMGg+AAAAACY8Sg+sK28vDzTEQAAwDTAcwIEg+ID2zp58qTpCAAAYBrgOQGCQfGBbQ0PD5uOAAAApgGeEyAYFB/YlsvlMh0BAABMAzwnQDAoPrCt9PR00xEAAMA0wHMCBIPiA9uqr683HQEAAEwDPCdAMCg+AAAAAGY8ig9sKzk52XQEAAAwDfCcAMGg+MC2QkI4fAEAAM8JEByOEthWR0eH6QgAAGAa4DkBgkHxAQAAADDjUXxgWzk5OaYjAACAaYDnBAgGxQe2xWltAAAg8ZwAwaH4wLYGBwdNRwAAANMAzwkQDIoPbMvpdJqOAAAApgGeEyAYFB/YVlZWlukIAABgGuA5AYIRZjoAcKlqa2tVXFxsOgYAADPe8PCwJKmystJwkguNjIyooqJCy5Ytk8vlMh3Hr66uznQEvA3FBwAAABd1/kn8nXfeaTaIDbndbtMR8DeXVHwefvhhffe731VHR4dKSkq0Z88eXXPNNRNuOzY2pp07d+qJJ55QW1ubCgoK9MADD2j16tX+bXbs2KFvfOMbAfcrKChQTU3NpcTDLJGYmGg6AgAAs8INN9wgScrPz1dkZKThNIHq6uq0ceNG7d27V/n5+abjBHC73VxqexqZcvF58skntWXLFj3yyCNaunSpdu/erVWrVqm2tlbJyckXbL99+3b96Ec/0qOPPqrCwkI9++yzuummm3TkyBF98IMf9G/3gQ98QL/73e/eDBbGyShcXEREhOkIAADMCnPnztW6detMx7io/Px8lZSUmI6BaWzKFzfYtWuXNmzYoLKyMhUXF+uRRx5RVFSU9u3bN+H2+/fv11133aXS0lJlZ2dr06ZNKi0t1YMPPhiwXVhYmFJTU/1f/Gs+3smpU6dMRwAAAIBNTKn4jI6O6tixY1q5cuWbOwgJ0cqVK1VRUTHhfTwezwVvNIuMjNQLL7wQcFt9fb3mz5+v7Oxs3XzzzWppaXnHPP39/err6/N/eTyeqTwcAAAAALPElF5P1tPTI6/Xq5SUlIDbU1JSJn0/zqpVq7Rr1y5df/31ysnJUXl5uQ4cOCCv1+vfZunSpXr88cdVUFCg9vZ2feMb39Df/d3f6S9/+YtiYmImzbNo0SINDQ35vy8rK9Mdd9yhefPm6fjx4/5slmWpq6tLkpSXl6eTJ09qeHhYLpdL6enpqq+vlyQlJycrJCTE/+m/OTk56ujo0ODgoJxOp7KyslRbWyvpjfeXRERE+M86LFy4UN3d3RoYGFB4eLhyc3NVXV0tSUpISFBkZKTa2tokvXHJxd7eXvX19Sk0NFQFBQWqrq6WZVmKi4tTTEyMWltbJUkZGRnq6+vT2bNn5XA4VFRUpNraWnm9XsXGxio+Pl7Nzc2SpAULFmhoaEi9vb2SpOLiYtXV1Wl8fFwxMTFKTEzUiRMnJEnz58+Xx+PR6dOnJUmFhYVqbGzU6OiooqOjlZKSosbGRknSvHnzND4+ru7ubklvnEpuaWnRyMiIIiMjlZaWpoaGBv96S1JnZ6ckKTc3V21tbf71zsjI8L9BMikpSWFhYWpvb5ckZWdnq7OzU4ODg4qIiFB2drb/uJo7d66cTmfAertcLlVVVSksLEz5+fmqqqryr3dUVJROnjwpScrMzNSZM2cmXe/Y2Fh/0U5PT1d/f/+k652QkKCmpiZJUlpamoaHh/3rXVRUpIaGBo2NjcntdispKSlgvUdHR9XT0yPpjfewNTU1yePxKDo6Wqmpqf5jNjU1VT6fL+CYbW1t9a/3ggULAo5Zh8PhX++cnBy1t7draGhITqdTmZmZF13vrq4uDQwMTLjeLpdrwmP27esdHx8vt9sdcMyeO3dO586dU0hIiAoLC1VTUyOfz6c5c+Zozpw5Aes9MDCgM2fOXHDMTrTeIyMjEx6zbrdbycnJFz1mm5ub5fF4FBUVxYyYITPC5/NJkhobG5WamnrBjOjp6VF/fz8zghnBjJjhM+L87/F4PGpubg76eQQzYubMiPNr+E4clmVZQW2pN15alJaWpiNHjmjZsmX+27du3arDhw/r6NGjF9ynu7tbGzZs0C9/+Us5HA7l5ORo5cqV2rdvn//SiG939uxZZWZmateuXfo//+f/XPDz8fFxHT58WNnZ2QoJefOkldPp5AOsZpGWlhZlZGSYjgHAkMrKSq1YsUKHDh3idf3ALMYsgNfrVWVlpZYvX37R6wRM6aVuiYmJCg0N9bfC8zo7O5WamjrhfZKSkvTMM89ocHBQzc3NqqmpkdvtVnZ29qS/Jy4uTvn5+f72P5mYmBjFxsb6vyg9s8vAwIDpCAAAALCJKRWfiIgIXXXVVSovL/ff5vP5VF5eHnAGaCIul0tpaWkaHx/X008/rTVr1ky67cDAgI4fP6558+ZNJR5mmfDwcNMRAAAAYBNTvqrbli1b9Oijj+qJJ55QdXW1Nm3apMHBQZWVlUmS1q1bp23btvm3P3r0qA4cOKDGxkb94Q9/0OrVq+Xz+bR161b/Nl/96ld1+PBhNTU16ciRI7rpppsUGhqqf/iHf7gMDxEzVW5urukIAAAAsIkpf1jO2rVr1d3drXvuuUcdHR1avHixDh486H8zWktLS8D7bkZGRrR9+3Y1NjbK7XartLRU+/fvV1xcnH+bkydP6h/+4R90+vRpJSUl6cMf/rD+9Kc/KSkp6d0/QsxY1dXVKi4uNh0DAAAANnBJnxK6efNmbd68ecKfPf/88wHfL1++3H/Vhsn89Kc/vZQYAAAAABCUKb/UDZguEhISTEcAAACATVB8YFuRkZGmIwAAAMAmKD6wrfMfigUAAAC8E4oPAAAAgBmP4gPbysrKMh0BAAAANkHxgW319vaajgAAAACboPjAtvr6+kxHAAAAgE1QfGBboaGhpiMAAADAJig+sK2CggLTEQAAAGATFB/YVnV1tekIAAAAsAmKD2zLsizTEQAAAGATFB/YVlxcnOkIAAAAsAmKD2wrJibGdAQAAADYBMUHttXa2mo6AgAAAGyC4gMAAABgxqP4wLYyMjJMRwAAAIBNUHxgW319faYjAAAAwCYoPrCts2fPmo4AAAAAm6D4wLYcDofpCAAAALAJig9sq6ioyHQEAAAA2ATFB7ZVW1trOgIAAABsguID2/J6vaYjAAAAwCYoPrCt2NhY0xEAAABgExQf2FZ8fLzpCAAAALAJig9sq7m52XQEAAAA2ATFBwAAAMCMR/GBbS1YsMB0BAAAANgExQe2NTQ0ZDoCAAAAbILiA9vq7e01HQEAAAA2QfEBAAAAMONRfGBbxcXFpiMAAADAJig+sK26ujrTEQAAAGATFB/Y1vj4uOkIAAAAsAmKD2wrJibGdAQAAADYBMUHtpWYmGg6AgAAAGyC4gPbOnHihOkIAAAAsAmKDwAAAIAZj+ID25o/f77pCAAAALAJig9sy+PxmI4AAAAAm6D4wLZOnz5tOgIAAABsguIDAAAAYMaj+MC2CgsLTUcAAACATVB8YFuNjY2mIwAAAMAmKD6wrdHRUdMRAAAAYBMUH9hWdHS06QgAAACwCYoPbCslJcV0BAAAANgExQe2xXt8AAAAECyKDwAAAIAZ75KKz8MPP6ysrCy5XC4tXbpUL7744qTbjo2N6b777lNOTo5cLpdKSkp08ODBSbf/13/9VzkcDt15552XEg2zyLx580xHAAAAgE1Mufg8+eST2rJli+699169/PLLKikp0apVq9TV1TXh9tu3b9fevXu1Z88eVVVV6bbbbtNNN92kV1555YJtX3rpJe3du1dXXnnl1B8JZp3x8XHTEQAAAGATUy4+u3bt0oYNG1RWVqbi4mI98sgjioqK0r59+ybcfv/+/brrrrtUWlqq7Oxsbdq0SaWlpXrwwQcDthsYGNDNN9+sRx99VPHx8Zf2aDCrdHd3m44AAAAAm5hS8RkdHdWxY8e0cuXKN3cQEqKVK1eqoqJiwvt4PB65XK6A2yIjI/XCCy8E3Hb77bfrhhtuCNj3O+nv71dfX5//y+PxTOHRAAAAAJgtwqaycU9Pj7xe7wWXEU5JSVFNTc2E91m1apV27dql66+/Xjk5OSovL9eBAwfk9Xr92/z0pz/Vyy+/rJdeemlK4RctWqShoSH/92VlZbrjjjs0b948HT9+3J/Nsiz/S/Hy8vJ08uRJDQ8Py+VyKT09XfX19ZKk5ORkhYSEqKOjQ5KUk5Ojjo4ODQ4Oyul0KisrS7W1tZKkxMRERURE6NSpU5KkhQsXqru7WwMDAwoPD1dubq6qq6slSQkJCYqMjFRbW5skKSsrS729verr61NoaKgKCgpUXV0ty7IUFxenmJgYtba2SpIyMjLU19ens2fPyuFwqKioSLW1tfJ6vYqNjVV8fLyam5slSQsWLNDQ0JB6e3slScXFxaqrq9P4+LhiYmKUmJioEydOSJLmz58vj8ej06dPS5IKCwvV2Nio0dFRRUdHKyUlxX/VtHnz5ml8fNx/hiU/P18tLS0aGRlRZGSk0tLS1NDQ4F9vSers7JQk5ebmqq2tzb/eGRkZqqurkyQlJSUpLCxM7e3tkqTs7Gx1dnZqcHBQERERys7O9h9Xc+fOldPpDFjv6OhoVVVVKSwsTPn5+aqqqvKvd1RUlE6ePClJyszM1JkzZyZd79jYWLW0tEiS0tPT1d/fP+l6JyQkqKmpSZKUlpam4eFh/3oXFRWpoaFBY2NjcrvdSkpKCljv0dFR9fT0SJIKCgrU1NQkj8ej6Ohopaam+o/Z1NRU+Xy+gGO2tbXVv94LFiwIOGYdDod/vXNyctTe3q6hoSE5nU5lZmZedL27uro0MDAw4Xq7XK4Jj9m3r3d8fLzcbnfAMXvu3DmdO3dOISEhKiwsVE1NjXw+n+bMmaM5c+YErPfAwIDOnDlzwTE70XqPjIxMeMy63W4lJydf9Jhtbm6Wx+NRVFQUM2KGzAifzyfpjSs8pqamXjAjenp61N/fz4xgRjAjZviMOP97PB6Pmpubg34ewYyYOTPi/Bq+E4dlWVZQW0o6deqU0tLSdOTIES1btsx/+9atW3X48GEdPXr0gvt0d3drw4YN+uUvfymHw6GcnBytXLlS+/bt0/DwsFpbW7VkyRI999xz/vf2/K//9b+0ePFi7d69e8Ic4+PjOnz4sLKzsxUS8uZJK6fTKafTGezDgc01NjYqOzvbdAwAhlRWVmrFihU6dOiQSkpKTMcBYAizAF6vV5WVlVq+fLnCwiY/rzOll7olJiYqNDTU3wrP6+zsVGpq6oT3SUpK0jPPPKPBwUE1NzerpqZGbrfb/4T12LFj6urq0oc+9CGFhYUpLCxMhw8f1r/9278pLCws4MzQ28XExCg2Ntb/RemZXUZGRkxHAAAAgE1MqfhEREToqquuUnl5uf82n8+n8vLygDNAE3G5XEpLS9P4+LiefvpprVmzRpL0kY98RK+//rpeffVV/9eSJUt0880369VXX1VoaOglPCzMBpGRkaYjAAAAwCam9B4fSdqyZYvWr1+vJUuW6JprrtHu3bs1ODiosrIySdK6deuUlpamnTt3SpKOHj2qtrY2LV68WG1tbdqxY4d8Pp+2bt0q6Y2zNosWLQr4HdHR0Zo7d+4FtwNvlZaWZjoCAAAAbGLKxWft2rXq7u7WPffco46ODi1evFgHDx70vxmtpaUl4H03IyMj2r59uxobG+V2u1VaWqr9+/crLi7usj0IzE4NDQ0qLi42HQMAAAA2MOXiI0mbN2/W5s2bJ/zZ888/H/D98uXL/VdtCNbb9wEAAAAA78aUP8AUmC7efll1AAAAYDIUHwAAAAAzHsUHtvX2y6oDAAAAk6H4AAAAAJjxKD6wrdzcXNMRAAAAYBMUH9hWW1ub6QgAAACwCYoPbGt4eNh0BAAAANgExQe25XK5TEcAAACATVB8YFsZGRmmIwAAAMAmKD6wrbq6OtMRAAAAYBMUHwAAAAAzHsUHtpWUlGQ6AgAAAGyC4gPbCgsLMx0BAAAANkHxgW21t7ebjgAAAACboPgAAAAAmPEoPrCt7Oxs0xEAAABgExQf2FZnZ6fpCAAAALAJig9sa3Bw0HQEAAAA2ASXxYJtRUREmI4AYIqGhoZUX19/WfZ1/kOML/eHGefl5SkqKuqy7hMAYB7FB7bFe3wA+6mvr9eKFSsu6z43btx4Wfd36NAhlZSUXNZ9AgDMo/jAtmpqalRcXGw6BoApyMvL06FDhy7LvkZGRlRRUaFly5bJ5XJdln1Kb2QEAMw8FB8AwPsmKirqsp5NiYmJ4R9AAABB4eIGsK25c+eajgDAMOYAACBYFB/YltPpNB0BgGHMAQBAsCg+sK1Tp06ZjgDAMOYAACBYFB8AAAAAMx7FB7a1cOFC0xEAGMYcAAAEi+ID2+rp6TEdAYBhzAEAQLAoPrCt/v5+0xEAGMYcAAAEi+ID2woL42OogNmOOQAACBbFB7aVn59vOgIAw5gDAIBgUXxgW1VVVaYjADCMOQAACBbFBwAAAMCMR/GBbSUkJJiOAMAw5gAAIFgUH9hWVFSU6QgADGMOAACCRfGBbZ08edJ0BACGMQcAAMGi+AAAAACY8Sg+sK3MzEzTEQAYxhwAAASL4gPbOnPmjOkIAAxjDgAAgkXxgW319fWZjgDAMOYAACBYFB/YVmhoqOkIAAxjDgAAgkXxgW0VFBSYjgDAMOYAACBYFB/YVnV1tekIAAxjDgAAgkXxgW1ZlmU6AgDDmAMAgGBRfGBbcXFxpiMAMIw5AAAIFsUHthUbG2s6AgDDmAMAgGBRfGBbLS0tpiMAMIw5AAAIFsUHAAAAwIx3ScXn4YcfVlZWllwul5YuXaoXX3xx0m3HxsZ03333KScnRy6XSyUlJTp48GDANt///vd15ZVXKjY2VrGxsVq2bJl++9vfXko0zCLp6emmIwAwjDkAAAjWlIvPk08+qS1btujee+/Vyy+/rJKSEq1atUpdXV0Tbr99+3bt3btXe/bsUVVVlW677TbddNNNeuWVV/zbLFiwQP/6r/+qY8eO6c9//rP+/u//XmvWrNFf//rXS39kmPH6+/tNRwBgGHMAABCsKRefXbt2acOGDSorK1NxcbEeeeQRRUVFad++fRNuv3//ft11110qLS1Vdna2Nm3apNLSUj344IP+bW688UaVlpYqLy9P+fn5uv/+++V2u/WnP/3poln6+/vV19fn//J4PFN9OLCxs2fPmo4AwDDmAAAgWGFT2Xh0dFTHjh3Ttm3b/LeFhIRo5cqVqqiomPA+Ho9HLpcr4LbIyEi98MILE27v9Xr11FNPaXBwUMuWLbtonkWLFmloaMj/fVlZme644w7NmzdPx48flySlpKTIsiz/Gam8vDydPHlSw8PDcrlcSk9PV319vSQpOTlZISEh6ujokCTl5OSoo6NDg4ODcjqdysrKUm1trSQpMTFREREROnXqlCRp4cKF6u7u1sDAgMLDw5Wbm+v/YL2EhARFRkaqra1NkpSVlaXe3l719fUpNDRUBQUFqq6ulmVZiouLU0xMjFpbWyVJGRkZ6uvr09mzZ+VwOFRUVKTa2lp5vV7FxsYqPj5ezc3Nkt44czY0NKTe3l5JUnFxserq6jQ+Pq6YmBglJibqxIkTkqT58+fL4/Ho9OnTkqTCwkI1NjZqdHRU0dHRSklJUWNjoyRp3rx5Gh8fV3d3tyQpPz9fLS0tGhkZUWRkpNLS0tTQ0OBfb0nq7OyUJOXm5qqtrc2/3hkZGaqrq5MkJSUlKSwsTO3t7ZKk7OxsdXZ2anBwUBEREcrOzlZNTY0kae7cuXI6nQHrPTIyoqqqKoWFhSk/P19VVVX+9Y6KitLJkyclSZmZmTpz5syk6x0bG+t/g3R6err6+/snXe+EhAQ1NTVJktLS0jQ8POxf76KiIjU0NGhsbExut1tJSUkB6z06Oqqenh5Jb3zafFNTkzwej6Kjo5Wamuo/ZlNTU+Xz+QKO2dbWVv96L1iwIOCYdTgc/vXOyclRe3u7hoaG5HQ6lZmZedH17urq0sDAwITr7XK5Jjxm377e8fHxcrvdAcfsuXPndO7cOYWEhKiwsFA1NTXy+XyaM2eO5syZE7DeAwMDOnPmzAXH7ETrPTIyMuEx63a7lZycfNFjtrm5WR6PR1FRUcyIGTQj+vv7VVVVNeGM6OnpUX9/PzOCGcGMmOEz4vzv8Xg8am5uDvp5BDNi5syI82v4ThzWFD797dSpU0pLS9ORI0cCSsnWrVt1+PBhHT169IL7fP7zn1dlZaWeeeYZ5eTkqLy8XGvWrJHX6w04Q/P6669r2bJlGhkZkdvt1o9//GOVlpZOmGN8fFyHDx9Wdna2QkLePGnldDrldDqDfTgAAACwucrKSq1YsUKHDh1SSUmJ6TgwwOv1qrKyUsuXL1dY2OTndd7zq7o99NBDysvLU2FhoSIiIrR582aVlZUFFBbpjeb66quv6ujRo9q0aZPWr1/vb4KTiYmJ8V8QITY2ltIzy5z/VzMAsxdzAAAQrCkVn8TERIWGhvpPh53X2dmp1NTUCe+TlJSkZ555RoODg2publZNTY3cbreys7MDtouIiFBubq6uuuoq7dy5UyUlJXrooYem+HAwm3i9XtMRABjGHAAABGtKxSciIkJXXXWVysvL/bf5fD6Vl5e/4/txXC6X0tLSND4+rqefflpr1qy56PY+n4+LFeCi+MR2AMwBAECwpnRxA0nasmWL1q9fryVLluiaa67R7t27NTg4qLKyMknSunXrlJaWpp07d0qSjh49qra2Ni1evFhtbW3asWOHfD6ftm7d6t/ntm3b9PGPf1wZGRnq7+/Xj3/8Yz3//PN69tlnL9PDxEyUkJBgOgIAw5gDAIBgTbn4rF27Vt3d3brnnnvU0dGhxYsX6+DBg/6rcLS0tAS8f2dkZETbt29XY2Oj3G63SktLtX//fsXFxfm36erq0rp169Te3q45c+boyiuv1LPPPquPfvSj7/4RYsZqampScXGx6RgADGIOAACCNeXiI0mbN2/W5s2bJ/zZ888/H/D98uXL3/EiBT/84Q8vJQYAAAAABOU9v6ob8F5JS0szHQGAYcwBAECwKD6wreHhYdMRABjGHAAABIviA9sK9lN6AcxczAEAQLAoPgAAAABmPIoPbKuoqMh0BACGMQcAAMGi+MC2GhoaTEcAYBhzAAAQLIoPbGtsbMx0BACGMQcAAMGi+MC23G636QgADGMOAACCRfGBbSUlJZmOAMAw5gAAIFgUH9jWiRMnTEcAYBhzAAAQLIoPAAAAgBmP4gPbmj9/vukIAAxjDgAAgkXxgW2Njo6ajgDAMOYAACBYFB/YVk9Pj+kIAAxjDgAAgkXxAQAAADDjUXxgWwUFBaYjADCMOQAACBbFB7bV1NRkOgIAw5gDAIBgUXxgWx6Px3QEAIYxBwAAwaL4wLaio6NNRwBgGHMAABAsig9sKzU11XQEAIYxBwAAwaL4wLaOHz9uOgIAw5gDAIBgUXwAAAAAzHhhpgMAl4qXuABgDgD2MzQ0pPr6+su2v7q6uoD/Xg55eXmKioq6bPvD9EDxgW35fD7TEQAYxhwA7Ke+vl4rVqy47PvduHHjZdvXoUOHVFJSctn2h+mB4gPb6urqUmJioukYAAxiDgD2k5eXp0OHDl22/Y2MjKiiokLLli2Ty+W6LPvMy8u7LPvB9ELxAQAAwPsmKirqsp9NiYmJUXFx8WXdJ2YeLm4A2+JfYwAwBwBIzAIEh+ID22ptbTUdAYBhzAEAErMAwaH4wLZGRkZMRwBgGHMAgMQsQHAoPrCtyMhI0xEAGMYcACAxCxAcig9sa8GCBaYjADCMOQBAYhYgOBQf2Nbl/PAzAPbEHAAgMQsQHIoPAAAAgBmP4gPbSk5ONh0BgGHMAQASswDBofjAthwOh+kIAAxjDgCQmAUIDsUHttXZ2Wk6AgDDmAMAJGYBgkPxAQAAADDjUXxgWzk5OaYjADCMOQBAYhYgOBQf2FZ7e7vpCAAMYw4AkJgFCA7FB7bk8Xi0Z88eeTwe01EAGMIcACAxCxA8ig9syePx6LHHHmPIAbMYcwCAxCxA8Cg+AAAAAGY8ig8AAACAGS/MdIBLYVmWJMnr9RpOAlN8Pp+ioqLk8/k4DoBZijkAQGIW4M1OcL4jTMZhvdMW09DIyIj++Mc/mo4BAAAAYJq47rrr5HK5Jv25LYuPz+fT6OioQkND5XA4TMcBAAAAYIhlWfJ6vYqIiFBIyOTv5LFl8QEAAACAqeDiBgAAAABmPIoPAAAAgBmP4gMAAABgxqP4AAAAYFp7/PHHFRcX5/9+x44dWrx48UXv09TUJIfDoVdfffU9zQb7oPjgknV0dOiOO+5Qdna2nE6n0tPTdeONN6q8vNx0tHf09gEK4P3R3d2tTZs2KSMjQ06nU6mpqVq1atX78hEFWVlZ2r1793v+ewBM7JZbbpHD4ZDD4VBERIRyc3N13333aXx8fMr7+upXvxrwfOOWW27RJz/5yYBt0tPT1d7erkWLFr3b6JghbPkBpjCvqalJ1113neLi4vTd735XV1xxhcbGxvTss8/q9ttvV01NjemI7wuv1yuHw3HRSycCeNOnPvUpjY6O6oknnlB2drY6OztVXl6u06dPv2e/c3R0VBEREe/Z/gEEb/Xq1Xrsscfk8Xj0m9/8RrfffrvCw8O1bdu2Ke3H7XbL7XZfdJvQ0FClpqa+m7iYaSzgEnz84x+30tLSrIGBgQt+dubMGcuyLKu5udn63//7f1vR0dFWTEyM9ZnPfMbq6Ojwb3fvvfdaJSUl1g9/+EMrPT3dio6OtjZt2mSNj49bDzzwgJWSkmIlJSVZ3/rWtwL2L8n6j//4D2v16tWWy+WyFi5caD311FP+nx86dMiS5M9hWZb1yiuvWJKsEydO+H/+1q97773XsizLGhkZsb7yla9Y8+fPt6KioqxrrrnGOnTokH8/jz32mDVnzhzrv/7rv6yioiIrNDTUOnHixLteT2A2OHPmjCXJev755yfd5p3+vi3Lsl577TVrxYoVlsvlshISEqwNGzZY/f39/p+vX7/eWrNmjfWtb33LmjdvnpWVlWUtX778gr97y7KspqYm6xOf+IQVFxdnRUVFWcXFxdavf/3r92YBgFnu/N/mW330ox+1/r//7/+zent7rS984QtWXFycFRkZaa1evdqqq6vzb3f+f3/PO/8c4vz//fa/70OHDlknTpywJFmvvPKK/35/+ctfrBtuuMGKiYmx3G639eEPf9hqaGiwLOuN5w9XX321FRUVZc2ZM8e69tprraampvdqOWAA/0yNKevt7dXBgwd1++23Kzo6+oKfx8XFyefzac2aNert7dXhw4f13HPPqbGxUWvXrg3Y9vjx4/rtb3+rgwcP6ic/+Yl++MMf6oYbbtDJkyd1+PBhPfDAA9q+fbuOHj0acL+7775bn/rUp1RZWambb75Zn/vc51RdXR1U/muvvVa7d+9WbGys2tvb1d7erq9+9auSpM2bN6uiokI//elP9dprr+kzn/mMVq9erfr6ev/9h4aG9MADD+j//t//q7/+9a9KTk6e6hICs9L5f6F95pln5PF4Jt3uYn/fg4ODWrVqleLj4/XSSy/pqaee0u9+9ztt3rw5YB/l5eWqra3Vc889p1/96lc6cOCAFixYoPvuu8//dy9Jt99+uzwej37/+9/r9ddf1wMPPPCO/4oM4PKJjIzU6OiobrnlFv35z3/WL37xC1VUVMiyLJWWlmpsbOwd9/HVr35Vn/3sZ7V69Wr/3/e11157wXZtbW26/vrr5XQ69T//8z86duyYbr31Vo2Pj2t8fFyf/OQntXz5cr322muqqKjQl770JTkcjvfiYcMU080L9nP06FFLknXgwIFJt/nv//5vKzQ01GppafHf9te//tWSZL344ouWZb3xLzRRUVFWX1+ff5tVq1ZZWVlZltfr9d9WUFBg7dy50/+9JOu2224L+H1Lly61Nm3aZFnWO5/xsawL/+XIst44QxUaGmq1tbUF3P6Rj3zE2rZtm/9+kqxXX3110scOYHI/+9nPrPj4eMvlclnXXnuttW3bNquystL/83f6+/7BD35gxcfHB5xt/vWvf22FhIT4zyivX7/eSklJsTweT8B+MjMzre9973sBt11xxRXWjh07LudDBDCJt57x8fl81nPPPWc5nU7rk5/8pCXJ+uMf/+jftqenx4qMjLT+8z//07Ksi5/xefu+z3v7GZ9t27ZZCxcutEZHRy/Idvr06Xc8Iw3744wPpsyyrHfcprq6Wunp6UpPT/ffVlxcrLi4uIAzM1lZWYqJifF/n5KSouLi4oD3zKSkpKirqytg/8uWLbvg+2DP+Ezm9ddfl9frVX5+vv9fpt1utw4fPqzjx4/7t4uIiNCVV175rn4XMFt96lOf0qlTp/SLX/xCq1ev1vPPP68PfehDevzxx/3bXOzvu7q6WiUlJQFnm6+77jr5fD7V1tb6b7viiiuCel/Pl7/8ZX3rW9/Sddddp3vvvVevvfbau3yEAC7mV7/6ldxut1wulz7+8Y9r7dq1uuWWWxQWFqalS5f6t5s7d64KCgre9f+2v9Wrr76qv/u7v1N4ePgFP0tISNAtt9yiVatW6cYbb9RDDz3kPzOMmYPigynLy8uTw+G4LBcwePvwcTgcE97m8/mC3uf50vTWghbMqfKBgQGFhobq2LFjevXVV/1f1dXVeuihh/zbRUZGcuobeBdcLpc++tGP6u6779aRI0d0yy236N57772sv2Oil+FO5Itf/KIaGxv1hS98Qa+//rqWLFmiPXv2XNYsAN60YsUKvfrqq6qvr9fw8LCeeOKJ9+1/UyMjIy/688cee0wVFRW69tpr9eSTTyo/P19/+tOf3pdseH9QfDBlCQkJWrVqlR5++GENDg5e8POzZ8+qqKhIra2tam1t9d9eVVWls2fPqri4+F1nePsg+tOf/qSioiJJUlJSkiQF/EvN26/hHxERIa/XG3DbBz/4QXm9XnV1dSk3Nzfgi6vCAO+d4uLigFlysb/voqIiVVZWBmz/xz/+USEhISooKLjo75no715645K3t912mw4cOKCvfOUrevTRR9/NwwFwEdHR0crNzVVGRobCwt64uHBRUZHGx8cD3s97+vRp1dbWBv2cYbK/77e68sor9Yc//OGi/xj6wQ9+UNu2bdORI0e0aNEi/fjHPw7q98MeKD64JA8//LC8Xq+uueYaPf3006qvr1d1dbX+7d/+TcuWLdPKlSt1xRVX6Oabb9bLL7+sF198UevWrdPy5cu1ZMmSd/37n3rqKe3bt091dXW699579eKLL/rf3Jybm6v09HTt2LFD9fX1+vWvf60HH3ww4P5ZWVkaGBhQeXm5enp6NDQ0pPz8fN18881at26dDhw4oBMnTujFF1/Uzp079etf//pdZwZmu9OnT+vv//7v9aMf/UivvfaaTpw4oaeeekrf+c53tGbNGv92F/v7vvnmm+VyubR+/Xr95S9/0aFDh3THHXfoC1/4glJSUi76+7OysvT73/9ebW1t6unpkSTdeeedevbZZ3XixAm9/PLLOnTokL9kAXh/5OXlac2aNdqwYYNeeOEFVVZW6h//8R+VlpYWMBsuJisrS6+99ppqa2vV09MzYbnZvHmz+vr69LnPfU5//vOfVV9fr/3796u2tlYnTpzQtm3bVFFRoebmZv33f/+36uvrmQczDMUHlyQ7O1svv/yyVqxYoa985StatGiRPvrRj6q8vFzf//735XA49F//9V+Kj4/X9ddfr5UrVyo7O1tPPvnkZfn93/jGN/TTn/5UV155pf7f//t/+slPfuL/V6Hw8HD95Cc/UU1Nja688ko98MAD+ta3vhVw/2uvvVa33Xab1q5dq6SkJH3nO9+R9MZp7nXr1ukrX/mKCgoK9MlPflIvvfSSMjIyLktuYDZzu91aunSpvve97+n666/XokWLdPfdd2vDhg3693//d/92F/v7joqK0rPPPqve3l5dffXV+vSnP62PfOQjAfefzH333aempibl5OT4zwx7vV7dfvvtKioq0urVq5Wfn6//+I//eG8WAMCkHnvsMV111VX6xCc+oWXLlsmyLP3mN7+Z8P04E9mwYYMKCgq0ZMkSJSUlTfihyHPnztX//M//aGBgQMuXL9dVV12lRx99VOHh4YqKilJNTY0+9alPKT8/X1/60pd0++23a+PGjZf7ocIghxXMO9WBacThcOjnP//5BZ/QDMD++PsGALxXOOMDAAAAYMaj+AAAAACY8XipGwAAAIAZjzM+AAAAAGY8ig8AAACAGY/iAwAAAGDGo/gAAAAAmPEoPgAAAABmPIoPAAAAgBmP4gMAAABgxqP4AAAAAJjx/n8XMkFuwhcgjAAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 1000x600 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "my_dict = {'Computer': score_test_in, 'Sports': score_test_out_Sports, 'Politics': score_test_out_Politics}\n",
    "\n",
    "plt.figure(figsize=(10,6))\n",
    "plt.boxplot(my_dict.values(), labels=my_dict.keys());\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 298,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "Kinfmb_NDlRR",
    "outputId": "a3cfcb09-6e3e-419d-b825-bba19f8f4ba3"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.99812\n",
      "0.991432\n"
     ]
    }
   ],
   "source": [
    "score_pred_Sports = np.concatenate([score_test_in, score_test_out_Sports])\n",
    "score_pred_Politics = np.concatenate([score_test_in, score_test_out_Politics])\n",
    "score_true = np.concatenate([np.ones(500), np.zeros(500)])\n",
    "\n",
    "\n",
    "print(roc_auc_score(score_true, score_pred_Sports))\n",
    "print(roc_auc_score(score_true, score_pred_Politics))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 299,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "vgMRPvQ6EuZx",
    "outputId": "ca8099b2-40af-45c8-c948-839091a08dcf"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.9952789175629133\n",
      "0.9723320686317982\n"
     ]
    }
   ],
   "source": [
    "precision_Sports, recall_Sports, thresholds_Sports = precision_recall_curve(score_true, score_pred_Sports)\n",
    "precision_Politics, recall_Politics, thresholds_Politics = precision_recall_curve(score_true, score_pred_Politics)\n",
    "\n",
    "auc_precision_recall_Sports = auc(recall_Sports, precision_Sports)\n",
    "auc_precision_recall_Politics = auc(recall_Politics, precision_Politics)\n",
    "\n",
    "print(auc_precision_recall_Sports)\n",
    "print(auc_precision_recall_Politics)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 300,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "sk0lXKvnEunm",
    "outputId": "d12ad857-08a8-431f-8509-f3326995bd91"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.002\n",
      "0.02\n"
     ]
    }
   ],
   "source": [
    "def compute_fpr90(y_true, y_pred_probs):\n",
    "    fpr, tpr, thresholds = sklearn.metrics.roc_curve(y_true, y_pred_probs)\n",
    "    idx = np.abs(tpr - 0.90).argmin()\n",
    "    fpr90 = fpr[idx]\n",
    "    return fpr90\n",
    "\n",
    "\n",
    "fpr90_score_Sports = compute_fpr90(score_true, score_pred_Sports)\n",
    "fpr90_score_Politics = compute_fpr90(score_true, score_pred_Politics)\n",
    "\n",
    "print(fpr90_score_Sports)\n",
    "print(fpr90_score_Politics)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 300,
   "metadata": {
    "id": "8qvhx56297Yo"
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 300,
   "metadata": {
    "id": "gRlaGCFZF_bh"
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "Jcg0DiW2chd1"
   },
   "source": [
    "# Sports vs. Computer and Politics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 301,
   "metadata": {
    "id": "_ZH_Vomn97a_"
   },
   "outputs": [],
   "source": [
    "categories_train = ['rec.sport.baseball', 'rec.sport.hockey']\n",
    "newsgroups_train = fetch_20newsgroups(subset='train', categories=categories_train, random_state=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 302,
   "metadata": {
    "id": "YhMMU7lX-JcZ"
   },
   "outputs": [],
   "source": [
    "embeddings_Sports_train = np.zeros((1197,300))\n",
    "labels_Sports_train = newsgroups_train.target\n",
    "\n",
    "i = 0\n",
    "\n",
    "for line in newsgroups_train.data:\n",
    "    embedding = global_vectors.get_vecs_by_tokens(tokenizer(line), lower_case_backup=True)\n",
    "    embedding_mean = torch.mean(embedding,0,True)\n",
    "    embeddings_Sports_train[i,:] = embedding_mean\n",
    "\n",
    "    i += 1\n",
    "\n",
    "#print(i)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 303,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "rdqvR7wX-Jec",
    "outputId": "98e19832-a938-46cb-f166-6c7c1a969f51"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(1197, 300)\n",
      "(1197,)\n",
      "[0 0 1 ... 1 0 1]\n"
     ]
    }
   ],
   "source": [
    "latent_train = embeddings_Sports_train\n",
    "y = labels_Sports_train\n",
    "print(latent_train.shape)\n",
    "print(y.shape)\n",
    "print(y)\n",
    "# np.random.shuffle(y)\n",
    "# print(y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 304,
   "metadata": {
    "id": "RmtqzWbX-JgS"
   },
   "outputs": [],
   "source": [
    "et = ExtraTreesClassifier(n_estimators=100, min_samples_leaf=10,\n",
    "                          max_features=\"sqrt\", bootstrap=True, class_weight='balanced', n_jobs=-1)\n",
    "\n",
    "# et = RandomForestClassifier(n_estimators=100, min_samples_leaf=10,\n",
    "#                           max_features=None, bootstrap=True, class_weight='balanced', n_jobs=-1)\n",
    "\n",
    "# validation instance\n",
    "skf = StratifiedKFold(n_splits=5, shuffle=True)\n",
    "\n",
    "# getting the model validation predictions\n",
    "preds = cross_val_predict(et, latent_train, y, cv=skf, method='predict_proba')\n",
    "\n",
    "# evaluating the model\n",
    "#print('Area under the ROC Curve:', roc_auc_score(y, preds, multi_class='ovo'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 305,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 92
    },
    "id": "Qpt8gZnO-lwW",
    "outputId": "1f7ce24e-89f5-42df-de57-8f13267c8054"
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>#sk-container-id-15 {color: black;background-color: white;}#sk-container-id-15 pre{padding: 0;}#sk-container-id-15 div.sk-toggleable {background-color: white;}#sk-container-id-15 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-15 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-15 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-15 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-15 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-15 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-15 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-15 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-15 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-15 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-15 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-15 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-15 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-15 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-15 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-15 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-15 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-15 div.sk-item {position: relative;z-index: 1;}#sk-container-id-15 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-15 div.sk-item::before, #sk-container-id-15 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-15 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-15 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-15 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-15 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-15 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-15 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-15 div.sk-label-container {text-align: center;}#sk-container-id-15 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-15 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-15\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>ExtraTreesClassifier(bootstrap=True, class_weight=&#x27;balanced&#x27;,\n",
       "                     min_samples_leaf=10, n_jobs=-1)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-15\" type=\"checkbox\" checked><label for=\"sk-estimator-id-15\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">ExtraTreesClassifier</label><div class=\"sk-toggleable__content\"><pre>ExtraTreesClassifier(bootstrap=True, class_weight=&#x27;balanced&#x27;,\n",
       "                     min_samples_leaf=10, n_jobs=-1)</pre></div></div></div></div></div>"
      ],
      "text/plain": [
       "ExtraTreesClassifier(bootstrap=True, class_weight='balanced',\n",
       "                     min_samples_leaf=10, n_jobs=-1)"
      ]
     },
     "execution_count": 305,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "et.fit(latent_train,y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 306,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "jSqVxT60-pBs",
    "outputId": "67d46f30-3794-446e-faf3-bca9db68735b"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(1197, 100)\n",
      "[[ 46  36  27 ...  47  66  27]\n",
      " [ 53  14  15 ...  79  36 104]\n",
      " [ 21   6  39 ...  83  77  93]\n",
      " ...\n",
      " [ 53  91 104 ...  46  74  17]\n",
      " [ 94 100  80 ...  45  59  48]\n",
      " [ 45  15  94 ...  32  63  32]]\n",
      "0.9799220040080192\n",
      "1.3360999113583697e-06\n"
     ]
    }
   ],
   "source": [
    "leaves_train = et.apply(latent_train)\n",
    "print(leaves_train.shape)\n",
    "print(leaves_train)\n",
    "\n",
    "distances_train = np.zeros((500,500))\n",
    "\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_train[i,j] = hamming(leaves_train[i,:], leaves_train[j,:])\n",
    "\n",
    "score_train = sum(distances_train)/499\n",
    "\n",
    "print(np.mean(score_train))\n",
    "print(np.cov(score_train))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "GAr7zMEM-pgv"
   },
   "source": [
    "## Testing on ID Data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "OEMc8k7E-wN2"
   },
   "source": [
    "### Sports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 307,
   "metadata": {
    "id": "9bRlJ139-yvU"
   },
   "outputs": [],
   "source": [
    "categories_test = ['rec.sport.baseball', 'rec.sport.hockey']\n",
    "newsgroups_test = fetch_20newsgroups(subset='test', categories=categories_test, random_state=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 308,
   "metadata": {
    "id": "bfBt8Y7u-yxq"
   },
   "outputs": [],
   "source": [
    "embeddings_Sports_test = np.zeros((500,300))\n",
    "labels_Sports_test = newsgroups_test.target\n",
    "\n",
    "i = 0\n",
    "\n",
    "for line in newsgroups_test.data:\n",
    "    if i >= 500:\n",
    "        break\n",
    "    embedding = global_vectors.get_vecs_by_tokens(tokenizer(line), lower_case_backup=True)\n",
    "    embedding_mean = torch.mean(embedding,0,True)\n",
    "    embeddings_Sports_test[i,:] = embedding_mean\n",
    "    i += 1\n",
    "\n",
    "#print(i)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 309,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "4_RFTdix_NO5",
    "outputId": "bf5c4e91-28d2-4995-aa0d-3715f4a00c16"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(500, 100)\n",
      "[[ 68  91  57 ... 105  61  85]\n",
      " [ 34  21  60 ...  23  98  63]\n",
      " [ 49  67  78 ...  86  98  64]\n",
      " ...\n",
      " [  3  86  85 ...  45  66  83]\n",
      " [ 28 107   7 ...  50  97 100]\n",
      " [ 69  64  49 ...  99  36  86]]\n",
      "0.9772144288577187\n",
      "8.952374892475787e-06\n"
     ]
    }
   ],
   "source": [
    "latent_test_in = embeddings_Sports_test\n",
    "\n",
    "leaves_test_in = et.apply(latent_test_in)\n",
    "print(leaves_test_in.shape)\n",
    "print(leaves_test_in)\n",
    "\n",
    "distances_test_in = np.zeros((500,500))\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_test_in[i,j] = hamming(leaves_test_in[i,:], leaves_test_in[j,:])\n",
    "\n",
    "score_test_in = sum(distances_test_in)/499\n",
    "\n",
    "print(np.mean(score_test_in))\n",
    "print(np.cov(score_test_in))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "ZQjO8imi_pUz"
   },
   "source": [
    "## Testing on OOD Data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "DWrz5jwN_tw4"
   },
   "source": [
    "### Computer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 310,
   "metadata": {
    "id": "ExqGDTzD_tMw"
   },
   "outputs": [],
   "source": [
    "categories_test = ['comp.graphics', 'comp.os.ms-windows.misc', 'comp.sys.ibm.pc.hardware', 'comp.sys.mac.hardware', 'comp.windows.x']\n",
    "newsgroups_test = fetch_20newsgroups(subset='test', categories=categories_test, random_state=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 311,
   "metadata": {
    "id": "kVtyIHzq_tPD"
   },
   "outputs": [],
   "source": [
    "embeddings_Computer_test = np.zeros((500,300))\n",
    "labels_Computer_test = newsgroups_test.target\n",
    "\n",
    "i = 0\n",
    "\n",
    "for line in newsgroups_test.data:\n",
    "    if i>= 500:\n",
    "        break\n",
    "    embedding = global_vectors.get_vecs_by_tokens(tokenizer(line), lower_case_backup=True)\n",
    "    embedding_mean = torch.mean(embedding,0,True)\n",
    "    embeddings_Computer_test[i,:] = embedding_mean\n",
    "    i += 1\n",
    "\n",
    "#print(i)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 312,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "_GvxQbQWACcV",
    "outputId": "30556281-8958-42fc-99bd-672811471b1f"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(500, 100)\n",
      "[[  8  40  72 ... 103  14  55]\n",
      " [ 77 107  49 ... 106  27  78]\n",
      " [102  92  20 ...  87  16  64]\n",
      " ...\n",
      " [ 73  93  97 ...  42  14  63]\n",
      " [ 13  91  31 ...  98  10 101]\n",
      " [  8  74 108 ...  53   5  47]]\n",
      "0.9311421242484978\n",
      "0.000373926318561375\n"
     ]
    }
   ],
   "source": [
    "latent_test_out_Computer = embeddings_Computer_test\n",
    "\n",
    "leaves_test_out_Computer = et.apply(latent_test_out_Computer)\n",
    "print(leaves_test_out_Computer.shape)\n",
    "print(leaves_test_out_Computer)\n",
    "\n",
    "distances_test_out_Computer = np.zeros((500,500))\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_test_out_Computer[i,j] = hamming(leaves_test_out_Computer[i,:], leaves_test_out_Computer[j,:])\n",
    "\n",
    "score_test_out_Computer = sum(distances_test_out_Computer)/499\n",
    "\n",
    "print(np.mean(score_test_out_Computer))\n",
    "print(np.cov(score_test_out_Computer))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "eTnpMyfBAbGd"
   },
   "source": [
    "### Politics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 313,
   "metadata": {
    "id": "3UMF_3NGACeJ"
   },
   "outputs": [],
   "source": [
    "categories_test = ['talk.politics.guns', 'talk.politics.mideast', 'talk.politics.misc']\n",
    "newsgroups_test = fetch_20newsgroups(subset='test', categories=categories_test, random_state=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 314,
   "metadata": {
    "id": "tvsURtxNAaNs"
   },
   "outputs": [],
   "source": [
    "embeddings_Politics_test = np.zeros((500,300))\n",
    "labels_Politics_test = newsgroups_test.target\n",
    "\n",
    "i = 0\n",
    "\n",
    "for line in newsgroups_test.data:\n",
    "    if i>= 500:\n",
    "        break\n",
    "    embedding = global_vectors.get_vecs_by_tokens(tokenizer(line), lower_case_backup=True)\n",
    "    embedding_mean = torch.mean(embedding,0,True)\n",
    "    embeddings_Politics_test[i,:] = embedding_mean\n",
    "    i += 1\n",
    "\n",
    "#print(i)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 315,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "voDjVWOTAaRn",
    "outputId": "8ba66b8a-c64e-491e-ac8d-b3aca0e2e216"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(500, 100)\n",
      "[[ 85  92 101 ...  92  62  59]\n",
      " [ 73  50  45 ...  14  17 106]\n",
      " [ 78  45  89 ...  92  62  49]\n",
      " ...\n",
      " [ 78  83  19 ...  75  62  44]\n",
      " [ 69  22  19 ...  91   6   5]\n",
      " [ 72  21  49 ...  91  14  87]]\n",
      "0.9388475350701416\n",
      "0.00033973437613660435\n"
     ]
    }
   ],
   "source": [
    "latent_test_out_Politics = embeddings_Politics_test\n",
    "\n",
    "leaves_test_out_Politics = et.apply(latent_test_out_Politics)\n",
    "print(leaves_test_out_Politics.shape)\n",
    "print(leaves_test_out_Politics)\n",
    "\n",
    "distances_test_out_Politics = np.zeros((500,500))\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_test_out_Politics[i,j] = hamming(leaves_test_out_Politics[i,:], leaves_test_out_Politics[j,:])\n",
    "\n",
    "score_test_out_Politics = sum(distances_test_out_Politics)/499\n",
    "\n",
    "print(np.mean(score_test_out_Politics))\n",
    "print(np.cov(score_test_out_Politics))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "8WgQZi5RAwJF"
   },
   "source": [
    "## Results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 316,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "j6VoafMBAxuY",
    "outputId": "9051e709-67c9-470a-fd99-179d0d511c8f"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.9997139999999999\n",
      "0.987518\n"
     ]
    }
   ],
   "source": [
    "score_pred_Computer = np.concatenate([score_test_in, score_test_out_Computer])\n",
    "score_pred_Politics = np.concatenate([score_test_in, score_test_out_Politics])\n",
    "score_true = np.concatenate([np.ones(500), np.zeros(500)])\n",
    "\n",
    "\n",
    "print(roc_auc_score(score_true, score_pred_Computer))\n",
    "print(roc_auc_score(score_true, score_pred_Politics))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 317,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "dDDLWq66Axwr",
    "outputId": "2ee65df5-11b1-47b2-b8ac-54cfc9c838c7"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.9997075013177623\n",
      "0.9651167017988076\n"
     ]
    }
   ],
   "source": [
    "precision_Computer, recall_Computer, thresholds_Computer = precision_recall_curve(score_true, score_pred_Computer)\n",
    "precision_Politics, recall_Politics, thresholds_Politics = precision_recall_curve(score_true, score_pred_Politics)\n",
    "\n",
    "auc_precision_recall_Computer = auc(recall_Computer, precision_Computer)\n",
    "auc_precision_recall_Politics = auc(recall_Politics, precision_Politics)\n",
    "\n",
    "print(auc_precision_recall_Computer)\n",
    "print(auc_precision_recall_Politics)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 318,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "wZzYQ-2MBOFQ",
    "outputId": "9b7a2d67-095c-40a7-ec35-7a6ddf282384"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.0\n",
      "0.028\n"
     ]
    }
   ],
   "source": [
    "fpr90_score_Computer = compute_fpr90(score_true, score_pred_Computer)\n",
    "fpr90_score_Politics = compute_fpr90(score_true, score_pred_Politics)\n",
    "\n",
    "print(fpr90_score_Computer)\n",
    "print(fpr90_score_Politics)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 318,
   "metadata": {
    "id": "cydM8_gqCKvw"
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 318,
   "metadata": {
    "id": "zguwtf9NF8SO"
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "EKyyJqN0cwmF"
   },
   "source": [
    "# Politics vs. Sports and Computer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 319,
   "metadata": {
    "id": "aNYIntigCK9A"
   },
   "outputs": [],
   "source": [
    "categories_train = ['talk.politics.guns', 'talk.politics.mideast', 'talk.politics.misc']\n",
    "newsgroups_train = fetch_20newsgroups(subset='train', categories=categories_train, random_state=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 320,
   "metadata": {
    "id": "zCWcr9amCqsi"
   },
   "outputs": [],
   "source": [
    "embeddings_Politics_train = np.zeros((1575,300))\n",
    "labels_Politics_train = newsgroups_train.target\n",
    "\n",
    "i = 0\n",
    "\n",
    "for line in newsgroups_train.data:\n",
    "    embedding = global_vectors.get_vecs_by_tokens(tokenizer(line), lower_case_backup=True)\n",
    "    embedding_mean = torch.mean(embedding,0,True)\n",
    "    embeddings_Politics_train[i,:] = embedding_mean\n",
    "\n",
    "    i += 1\n",
    "\n",
    "#print(i)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 321,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "NDUb3MBgCqul",
    "outputId": "51052539-7a1c-4449-fc21-9366717e2410"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(1575, 300)\n",
      "(1575,)\n",
      "[2 0 1 ... 0 0 2]\n"
     ]
    }
   ],
   "source": [
    "latent_train = embeddings_Politics_train\n",
    "y = labels_Politics_train\n",
    "print(latent_train.shape)\n",
    "print(y.shape)\n",
    "print(y)\n",
    "# np.random.shuffle(y)\n",
    "# print(y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 322,
   "metadata": {
    "id": "LRh60y_lCqwr"
   },
   "outputs": [],
   "source": [
    "et = ExtraTreesClassifier(n_estimators=100, min_samples_leaf=10,\n",
    "                          max_features=\"sqrt\", bootstrap=True, class_weight='balanced', n_jobs=-1)\n",
    "\n",
    "# et = RandomForestClassifier(n_estimators=100, min_samples_leaf=10,\n",
    "#                           max_features=None, bootstrap=True, class_weight='balanced', n_jobs=-1)\n",
    "\n",
    "# validation instance\n",
    "skf = StratifiedKFold(n_splits=5, shuffle=True)\n",
    "\n",
    "# getting the model validation predictions\n",
    "preds = cross_val_predict(et, latent_train, y, cv=skf, method='predict_proba')\n",
    "\n",
    "# evaluating the model\n",
    "#print('Area under the ROC Curve:', roc_auc_score(y, preds, multi_class='ovo'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 323,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 92
    },
    "id": "XHBZVNpVDhBZ",
    "outputId": "bb7be275-ff6f-4a8c-d577-db93d571bef9"
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>#sk-container-id-16 {color: black;background-color: white;}#sk-container-id-16 pre{padding: 0;}#sk-container-id-16 div.sk-toggleable {background-color: white;}#sk-container-id-16 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-16 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-16 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-16 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-16 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-16 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-16 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-16 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-16 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-16 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-16 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-16 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-16 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-16 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-16 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-16 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-16 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-16 div.sk-item {position: relative;z-index: 1;}#sk-container-id-16 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-16 div.sk-item::before, #sk-container-id-16 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-16 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-16 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-16 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-16 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-16 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-16 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-16 div.sk-label-container {text-align: center;}#sk-container-id-16 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-16 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-16\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>ExtraTreesClassifier(bootstrap=True, class_weight=&#x27;balanced&#x27;,\n",
       "                     min_samples_leaf=10, n_jobs=-1)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-16\" type=\"checkbox\" checked><label for=\"sk-estimator-id-16\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">ExtraTreesClassifier</label><div class=\"sk-toggleable__content\"><pre>ExtraTreesClassifier(bootstrap=True, class_weight=&#x27;balanced&#x27;,\n",
       "                     min_samples_leaf=10, n_jobs=-1)</pre></div></div></div></div></div>"
      ],
      "text/plain": [
       "ExtraTreesClassifier(bootstrap=True, class_weight='balanced',\n",
       "                     min_samples_leaf=10, n_jobs=-1)"
      ]
     },
     "execution_count": 323,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "et.fit(latent_train,y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 324,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "79qf6-0xDksC",
    "outputId": "2bd2e8d4-d665-45a0-a2dd-86ee47d743c3"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(1575, 100)\n",
      "[[ 83  77  71 ...  19 138  49]\n",
      " [ 16  90  39 ... 132  75 107]\n",
      " [ 54 149 113 ... 136 126  54]\n",
      " ...\n",
      " [ 51 143  79 ...  98  18  91]\n",
      " [ 15  90  79 ... 103 114  39]\n",
      " [ 68 131  84 ...  35 104 125]]\n",
      "0.9850648496994019\n",
      "2.0249728753781617e-06\n"
     ]
    }
   ],
   "source": [
    "leaves_train = et.apply(latent_train)\n",
    "print(leaves_train.shape)\n",
    "print(leaves_train)\n",
    "\n",
    "distances_train = np.zeros((500,500))\n",
    "\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_train[i,j] = hamming(leaves_train[i,:], leaves_train[j,:])\n",
    "\n",
    "score_train = sum(distances_train)/499\n",
    "\n",
    "print(np.mean(score_train))\n",
    "print(np.cov(score_train))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "xCOq6sGtDlq_"
   },
   "source": [
    "## Testing on ID Data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "4oJ0PGFrc-wm"
   },
   "source": [
    "### Politics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 325,
   "metadata": {
    "id": "Xlr1iB_RY788"
   },
   "outputs": [],
   "source": [
    "categories_test = ['talk.politics.guns', 'talk.politics.mideast', 'talk.politics.misc']\n",
    "newsgroups_test = fetch_20newsgroups(subset='test', categories=categories_test, random_state=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 326,
   "metadata": {
    "id": "oszm6P2QY8CZ"
   },
   "outputs": [],
   "source": [
    "embeddings_Politics_test = np.zeros((500,300))\n",
    "labels_Politics_test = newsgroups_test.target\n",
    "\n",
    "i = 0\n",
    "\n",
    "for line in newsgroups_test.data:\n",
    "    if i>= 500:\n",
    "        break\n",
    "    embedding = global_vectors.get_vecs_by_tokens(tokenizer(line), lower_case_backup=True)\n",
    "    embedding_mean = torch.mean(embedding,0,True)\n",
    "    embeddings_Politics_test[i,:] = embedding_mean\n",
    "    i += 1\n",
    "\n",
    "#print(i)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 327,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "u2_pbH8zZs9r",
    "outputId": "e185c5ec-cabf-4dc8-ca6b-bed29733acbe"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(500, 100)\n",
      "[[ 87  15 133 ... 132  62  92]\n",
      " [105 111  30 ...  27  20 101]\n",
      " [110 131  48 ...  23  37 131]\n",
      " ...\n",
      " [141 141 113 ...   7 133  96]\n",
      " [ 68  71 133 ...  43  70  31]\n",
      " [ 22  84 110 ...  76  97  33]]\n",
      "0.982467735470945\n",
      "5.856782621190784e-06\n"
     ]
    }
   ],
   "source": [
    "latent_test_in = embeddings_Politics_test\n",
    "\n",
    "leaves_test_in = et.apply(latent_test_in)\n",
    "print(leaves_test_in.shape)\n",
    "print(leaves_test_in)\n",
    "\n",
    "distances_test_in = np.zeros((500,500))\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_test_in[i,j] = hamming(leaves_test_in[i,:], leaves_test_in[j,:])\n",
    "\n",
    "score_test_in = sum(distances_test_in)/499\n",
    "\n",
    "print(np.mean(score_test_in))\n",
    "print(np.cov(score_test_in))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "GAGbLPQCY8tW"
   },
   "source": [
    "## Testing on OOD Data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "7q9jbVZKEGdJ"
   },
   "source": [
    "### Sports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 328,
   "metadata": {
    "id": "B5xSXHnHDlHD"
   },
   "outputs": [],
   "source": [
    "categories_test = ['rec.sport.baseball', 'rec.sport.hockey']\n",
    "newsgroups_test = fetch_20newsgroups(subset='test', categories=categories_test, random_state=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 329,
   "metadata": {
    "id": "oy-6BrZ1D4sO"
   },
   "outputs": [],
   "source": [
    "embeddings_Sports_test = np.zeros((500,300))\n",
    "labels_Sports_test = newsgroups_test.target\n",
    "\n",
    "i = 0\n",
    "\n",
    "for line in newsgroups_test.data:\n",
    "    if i>= 500:\n",
    "        break\n",
    "    embedding = global_vectors.get_vecs_by_tokens(tokenizer(line), lower_case_backup=True)\n",
    "    embedding_mean = torch.mean(embedding,0,True)\n",
    "    embeddings_Sports_test[i,:] = embedding_mean\n",
    "    i += 1\n",
    "\n",
    "#print(i)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 330,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "t-rVMyR4D-8e",
    "outputId": "e7eb2865-3910-4347-e198-aee532b03f8b"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(500, 100)\n",
      "[[ 60  85  97 ...  27  96  53]\n",
      " [112 114  31 ...  51 138  51]\n",
      " [ 86  85 137 ...  54 137  87]\n",
      " ...\n",
      " [ 60  37  33 ...  27  75  60]\n",
      " [ 92 108  36 ...  12 138  89]\n",
      " [ 40  91  31 ...   7  51  77]]\n",
      "0.9546064128256533\n",
      "0.00014246523512768247\n"
     ]
    }
   ],
   "source": [
    "latent_test_out_Sports = embeddings_Sports_test\n",
    "\n",
    "leaves_test_out_Sports = et.apply(latent_test_out_Sports)\n",
    "print(leaves_test_out_Sports.shape)\n",
    "print(leaves_test_out_Sports)\n",
    "\n",
    "distances_test_out_Sports = np.zeros((500,500))\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_test_out_Sports[i,j] = hamming(leaves_test_out_Sports[i,:], leaves_test_out_Sports[j,:])\n",
    "\n",
    "score_test_out_Sports = sum(distances_test_out_Sports)/499\n",
    "\n",
    "print(np.mean(score_test_out_Sports))\n",
    "print(np.cov(score_test_out_Sports))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "fSk9G3rWEJXH"
   },
   "source": [
    "### Computer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 331,
   "metadata": {
    "id": "yaA_0cUhEMCd"
   },
   "outputs": [],
   "source": [
    "categories_test = ['comp.graphics', 'comp.os.ms-windows.misc', 'comp.sys.ibm.pc.hardware', 'comp.sys.mac.hardware', 'comp.windows.x']\n",
    "newsgroups_test = fetch_20newsgroups(subset='test', categories=categories_test, random_state=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 332,
   "metadata": {
    "id": "6FlJLp0fEMEx"
   },
   "outputs": [],
   "source": [
    "embeddings_Computer_test = np.zeros((500,300))\n",
    "labels_Computer_test = newsgroups_test.target\n",
    "\n",
    "i = 0\n",
    "\n",
    "for line in newsgroups_test.data:\n",
    "    if i>= 500:\n",
    "        break\n",
    "    embedding = global_vectors.get_vecs_by_tokens(tokenizer(line), lower_case_backup=True)\n",
    "    embedding_mean = torch.mean(embedding,0,True)\n",
    "    embeddings_Computer_test[i,:] = embedding_mean\n",
    "    i += 1\n",
    "\n",
    "#print(i)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 333,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "iR06u96LEMIa",
    "outputId": "a6c46e3e-d35f-46b1-a021-41b247bf8f2c"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(500, 100)\n",
      "[[ 77 122 101 ... 104  82  19]\n",
      " [ 87 143  97 ... 142 141  92]\n",
      " [109 111  97 ... 104  81  62]\n",
      " ...\n",
      " [ 87  52  96 ...  27  76  70]\n",
      " [ 22  52  65 ... 129  51  91]\n",
      " [ 43  91  30 ...  46  84 148]]\n",
      "0.9525584769539093\n",
      "0.00013315041526221026\n"
     ]
    }
   ],
   "source": [
    "latent_test_out_Computer = embeddings_Computer_test\n",
    "\n",
    "leaves_test_out_Computer = et.apply(latent_test_out_Computer)\n",
    "print(leaves_test_out_Computer.shape)\n",
    "print(leaves_test_out_Computer)\n",
    "\n",
    "distances_test_out_Computer = np.zeros((500,500))\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_test_out_Computer[i,j] = hamming(leaves_test_out_Computer[i,:], leaves_test_out_Computer[j,:])\n",
    "\n",
    "score_test_out_Computer = sum(distances_test_out_Computer)/499\n",
    "\n",
    "print(np.mean(score_test_out_Computer))\n",
    "print(np.cov(score_test_out_Computer))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "LGgLGDx3Ef-R"
   },
   "source": [
    "## Results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 334,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "cbiIJSZhEhaq",
    "outputId": "7ec30d20-806f-4788-e211-cb77888ff73a"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.998948\n",
      "0.99798\n"
     ]
    }
   ],
   "source": [
    "score_pred_Computer = np.concatenate([score_test_in, score_test_out_Computer])\n",
    "score_pred_Sports = np.concatenate([score_test_in, score_test_out_Sports])\n",
    "score_true = np.concatenate([np.ones(500), np.zeros(500)])\n",
    "\n",
    "\n",
    "print(roc_auc_score(score_true, score_pred_Computer))\n",
    "print(roc_auc_score(score_true, score_pred_Sports))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 335,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "IzUM2ADBEhcu",
    "outputId": "6c8cf7f2-52b4-4b93-f47f-97a52d2f731f"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.9988981487387412\n",
      "0.9977265629322636\n"
     ]
    }
   ],
   "source": [
    "precision_Computer, recall_Computer, thresholds_Computer = precision_recall_curve(score_true, score_pred_Computer)\n",
    "precision_Sports, recall_Sports, thresholds_Sports = precision_recall_curve(score_true, score_pred_Sports)\n",
    "\n",
    "auc_precision_recall_Computer = auc(recall_Computer, precision_Computer)\n",
    "auc_precision_recall_Sports = auc(recall_Sports, precision_Sports)\n",
    "\n",
    "print(auc_precision_recall_Computer)\n",
    "print(auc_precision_recall_Sports)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 336,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "5P_nA8JmEheU",
    "outputId": "16156a15-01b6-4cd0-9fd6-9a7ce07d9a4c"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.004\n",
      "0.006\n"
     ]
    }
   ],
   "source": [
    "fpr90_score_Computer = compute_fpr90(score_true, score_pred_Computer)\n",
    "fpr90_score_Sports = compute_fpr90(score_true, score_pred_Sports)\n",
    "\n",
    "print(fpr90_score_Computer)\n",
    "print(fpr90_score_Sports)"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
