{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 1317,
     "status": "ok",
     "timestamp": 1693861430776,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "Au3Fcvf9ONNZ",
    "outputId": "fe5369f0-dfb8-48a0-e88d-85d938bcf0f5"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount(\"/content/gdrive\", force_remount=True).\n"
     ]
    }
   ],
   "source": [
    "from google.colab import drive\n",
    "drive.mount('/content/gdrive')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 6106,
     "status": "ok",
     "timestamp": 1693848580150,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "DmeCfBPTjxgJ",
    "outputId": "3ebfbbd2-2b5d-4df4-948b-e303c8af289d"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Collecting PIMS\n",
      "  Downloading PIMS-0.6.1.tar.gz (86 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.0/86.0 kB\u001b[0m \u001b[31m2.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
      "Requirement already satisfied: imageio in /usr/local/lib/python3.10/dist-packages (from PIMS) (2.31.1)\n",
      "Requirement already satisfied: numpy>=1.19 in /usr/local/lib/python3.10/dist-packages (from PIMS) (1.23.5)\n",
      "Collecting slicerator>=0.9.8 (from PIMS)\n",
      "  Downloading slicerator-1.1.0-py3-none-any.whl (10 kB)\n",
      "Requirement already satisfied: pillow>=8.3.2 in /usr/local/lib/python3.10/dist-packages (from imageio->PIMS) (9.4.0)\n",
      "Building wheels for collected packages: PIMS\n",
      "  Building wheel for PIMS (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
      "  Created wheel for PIMS: filename=PIMS-0.6.1-py3-none-any.whl size=82615 sha256=cfe0be47fd499aa792b0ec4ac47308518590ab278d27724a528635222e7bcdd4\n",
      "  Stored in directory: /root/.cache/pip/wheels/cc/bf/3e/bfa77232d942f8244145f9c713b6b38f6ef04b6fb5c021c114\n",
      "Successfully built PIMS\n",
      "Installing collected packages: slicerator, PIMS\n",
      "Successfully installed PIMS-0.6.1 slicerator-1.1.0\n"
     ]
    }
   ],
   "source": [
    "pip install PIMS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 5282,
     "status": "ok",
     "timestamp": 1693848592816,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "vOZ-qtazj0pn",
    "outputId": "424fdddc-14ca-4b40-a47f-d4acbe45615b"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Collecting fastcluster\n",
      "  Downloading fastcluster-1.2.6-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n",
      "\u001b[?25l     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/194.0 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r",
      "\u001b[2K     \u001b[91m━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[91m╸\u001b[0m\u001b[90m━━━━━━━━━━━━━━\u001b[0m \u001b[32m122.9/194.0 kB\u001b[0m \u001b[31m3.5 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.0/194.0 kB\u001b[0m \u001b[31m4.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: numpy>=1.9 in /usr/local/lib/python3.10/dist-packages (from fastcluster) (1.23.5)\n",
      "Installing collected packages: fastcluster\n",
      "Successfully installed fastcluster-1.2.6\n"
     ]
    }
   ],
   "source": [
    "pip install fastcluster"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "UFN8FbHYOWnl"
   },
   "outputs": [],
   "source": [
    "import torch\n",
    "import numpy as np\n",
    "from torchvision import datasets\n",
    "import torchvision.transforms as transforms\n",
    "import torch.nn as nn\n",
    "import torch.nn.functional as F\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import torchvision\n",
    "import torch\n",
    "from torchvision import transforms\n",
    "\n",
    "plt.rcParams['axes.facecolor'] = 'white'\n",
    "plt.rcParams['figure.figsize'] = 9, 6"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "KH8zgcDRj3zg"
   },
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "\n",
    "# importing relevant libraries\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import scipy as sp\n",
    "import matplotlib.pyplot as plt\n",
    "import sklearn\n",
    "from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier\n",
    "from sklearn.preprocessing import OneHotEncoder, StandardScaler\n",
    "from sklearn.model_selection import cross_val_predict, StratifiedKFold\n",
    "from sklearn.metrics import roc_auc_score, average_precision_score, precision_recall_curve, auc#plot_precision_recall_curve\n",
    "from sklearn.datasets import make_classification\n",
    "from sklearn.preprocessing import MinMaxScaler\n",
    "from tqdm import tqdm\n",
    "from umap import UMAP\n",
    "from pynndescent import NNDescent\n",
    "from fastcluster import single\n",
    "from scipy.cluster.hierarchy import cut_tree, fcluster, dendrogram\n",
    "from scipy.spatial.distance import squareform\n",
    "from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier\n",
    "from pims import ImageSequence\n",
    "from PIL import Image\n",
    "from scipy.spatial.distance import hamming\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')\n",
    "\n",
    "# turning off automatic plot showing, and setting style\n",
    "plt.style.use('bmh')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 14,
     "status": "ok",
     "timestamp": 1693776879316,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "8iNq7_xZ6WJK",
    "outputId": "ddee8a99-1a3e-45dd-f866-5319b906a9d2"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "cpu\n"
     ]
    }
   ],
   "source": [
    "if torch.cuda.is_available():\n",
    "    device = torch.device('cuda')\n",
    "else:\n",
    "    device = torch.device('cpu')\n",
    "\n",
    "print(device)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "HrjCzoe8OWqH"
   },
   "outputs": [],
   "source": [
    "# define the NN architecture\n",
    "class ConvAutoencoder(nn.Module):\n",
    "    def __init__(self):\n",
    "        super(ConvAutoencoder, self).__init__()\n",
    "        ## encoder layers ##\n",
    "        # conv layer (depth from 1 --> 16), 3x3 kernels\n",
    "        self.conv1 = nn.Conv2d(1, 16, 3, padding=1)\n",
    "        # conv layer (depth from 16 --> 4), 3x3 kernels\n",
    "        self.conv2 = nn.Conv2d(16, 4, 3, padding=1)\n",
    "        # pooling layer to reduce x-y dims by two; kernel and stride of 2\n",
    "        self.pool = nn.MaxPool2d(2, 2)\n",
    "\n",
    "        ## decoder layers ##\n",
    "        ## a kernel of 2 and a stride of 2 will increase the spatial dims by 2\n",
    "        self.t_conv1 = nn.ConvTranspose2d(4, 16, 2, stride=2)\n",
    "        self.t_conv2 = nn.ConvTranspose2d(16, 1, 2, stride=2)\n",
    "\n",
    "\n",
    "    def forward(self, x):\n",
    "        ## encode ##\n",
    "        # add hidden layers with relu activation function\n",
    "        # and maxpooling after\n",
    "        x = F.relu(self.conv1(x))\n",
    "        x = self.pool(x)\n",
    "        # add second hidden layer\n",
    "        x = F.relu(self.conv2(x))\n",
    "        x = self.pool(x)  # compressed representation\n",
    "\n",
    "        ## decode ##\n",
    "        # add transpose conv layers, with relu activation function\n",
    "        x = F.relu(self.t_conv1(x))\n",
    "        # output layer (with sigmoid for scaling from 0 to 1)\n",
    "        x = F.sigmoid(self.t_conv2(x))\n",
    "\n",
    "        return x\n",
    "\n",
    "    def forward_encoder(self, x):\n",
    "        x = F.relu(self.conv1(x))\n",
    "        x = self.pool(x)\n",
    "        # add second hidden layer\n",
    "        x = F.relu(self.conv2(x))\n",
    "        x = self.pool(x)  # compressed representation\n",
    "\n",
    "        return x\n",
    "\n",
    "\n",
    "# Training function\n",
    "def train(model, train_loader, criterion, optimizer, num_epochs=10, online = False):\n",
    "    model.train()\n",
    "    for epoch in range(num_epochs):\n",
    "        running_loss = 0.0\n",
    "        if not online:\n",
    "            for images in train_loader:\n",
    "                noisy_images = images #+ torch.randn(images.size()) * 0.1  # Adding Gaussian noise\n",
    "                noisy_images = torchvision.transforms.Grayscale(num_output_channels=1)(noisy_images)\n",
    "                optimizer.zero_grad()\n",
    "                outputs = model(noisy_images)\n",
    "                loss = criterion(outputs, images)\n",
    "                loss.backward()\n",
    "                optimizer.step()\n",
    "                running_loss += loss.item()\n",
    "        else:\n",
    "            for images, _ in train_loader:\n",
    "                noisy_images = images #+ torch.randn(images.size()) * 0.1  # Adding Gaussian noise\n",
    "                noisy_images = torchvision.transforms.Grayscale(num_output_channels=1)(noisy_images)\n",
    "                optimizer.zero_grad()\n",
    "                outputs = model(noisy_images)\n",
    "                loss = criterion(outputs, images)\n",
    "                loss.backward()\n",
    "                optimizer.step()\n",
    "                running_loss += loss.item()\n",
    "        print(f\"Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss / len(train_loader)}\")\n",
    "\n",
    "# Data preprocessing\n",
    "transform = transforms.Compose([\n",
    "    transforms.ToTensor(),\n",
    "])\n",
    "\n",
    "# Initialize the model, criterion, and optimizer\n",
    "model = ConvAutoencoder()\n",
    "criterion = nn.MSELoss()\n",
    "optimizer = torch.optim.Adam(model.parameters(), lr=0.001)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 574044,
     "status": "ok",
     "timestamp": 1693777453529,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "Om9z7Bi4O-sW",
    "outputId": "d622c36e-7c97-4878-d3f0-5a17f4ab8089"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch [1/30], Loss: 0.03625135279115615\n",
      "Epoch [2/30], Loss: 0.016316385701425802\n",
      "Epoch [3/30], Loss: 0.015162050212695719\n",
      "Epoch [4/30], Loss: 0.014537293906770409\n",
      "Epoch [5/30], Loss: 0.014152994614714054\n",
      "Epoch [6/30], Loss: 0.013855012639094073\n",
      "Epoch [7/30], Loss: 0.013634108850903222\n",
      "Epoch [8/30], Loss: 0.013412454854577843\n",
      "Epoch [9/30], Loss: 0.013118600056790657\n",
      "Epoch [10/30], Loss: 0.012922098417319595\n",
      "Epoch [11/30], Loss: 0.012769461126882892\n",
      "Epoch [12/30], Loss: 0.012638427517704491\n",
      "Epoch [13/30], Loss: 0.012526669340934962\n",
      "Epoch [14/30], Loss: 0.012376039483542763\n",
      "Epoch [15/30], Loss: 0.01217763080819647\n",
      "Epoch [16/30], Loss: 0.012001476567318\n",
      "Epoch [17/30], Loss: 0.011808889175354163\n",
      "Epoch [18/30], Loss: 0.011656046124187105\n",
      "Epoch [19/30], Loss: 0.01153611495538847\n",
      "Epoch [20/30], Loss: 0.011440959183185467\n",
      "Epoch [21/30], Loss: 0.011363453461107478\n",
      "Epoch [22/30], Loss: 0.011291656083167234\n",
      "Epoch [23/30], Loss: 0.011221084840047652\n",
      "Epoch [24/30], Loss: 0.011177602104906207\n",
      "Epoch [25/30], Loss: 0.01113684401948696\n",
      "Epoch [26/30], Loss: 0.011106442620378059\n",
      "Epoch [27/30], Loss: 0.011080778210854797\n",
      "Epoch [28/30], Loss: 0.011044674044621906\n",
      "Epoch [29/30], Loss: 0.01101641854477812\n",
      "Epoch [30/30], Loss: 0.010980736685674519\n"
     ]
    }
   ],
   "source": [
    "# Load CIFAR-10 data\n",
    "train_dataset = torchvision.datasets.FashionMNIST(root=\".FashionMNIST/train\", train=True, transform=transform, download=True)\n",
    "train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)\n",
    "\n",
    "# Training the model\n",
    "train(model, train_loader, criterion, optimizer, num_epochs=30, online=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 27584,
     "status": "ok",
     "timestamp": 1693777481084,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "bh3Zu2QWO-uq",
    "outputId": "3258a1da-6a47-4d2e-d25e-59db57fce08d"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'torchvision.datasets.mnist.FashionMNIST'>\n",
      "(60000, 1, 28, 28)\n",
      "(60000, 4, 7, 7)\n",
      "(60000,)\n"
     ]
    }
   ],
   "source": [
    "train_dataset = torchvision.datasets.FashionMNIST(root=\".FashionMNIST/train\", train=True, transform=transform, download=True)\n",
    "train_loader = torch.utils.data.DataLoader(train_dataset, batch_size = 60000, shuffle=True)\n",
    "\n",
    "print(type(train_dataset))\n",
    "\n",
    "for batch in train_loader:\n",
    "    img, labels = batch\n",
    "    #img = img.reshape(-1, 28*28)\n",
    "\n",
    "    # Generating output\n",
    "    out = model.forward(img)\n",
    "    latent_train = model.forward_encoder(img)\n",
    "\n",
    "X_train = img.detach().numpy()\n",
    "latent_train = latent_train.detach().numpy()\n",
    "y = labels.detach().numpy()\n",
    "print(X_train.shape)\n",
    "print(latent_train.shape)\n",
    "print(y.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 1087,
     "status": "ok",
     "timestamp": 1693777482164,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "ssO_Le6NP6HZ",
    "outputId": "14ab6ab1-cd25-4002-9869-74c820564875"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(60000, 784)\n",
      "(60000, 196)\n",
      "0.015891340656571416\n",
      "0.06602905856292143\n"
     ]
    }
   ],
   "source": [
    "X_train = X_train.reshape(-1,28*28)\n",
    "latent_train = latent_train.reshape(-1,4*7*7)\n",
    "print(X_train.shape)\n",
    "print(latent_train.shape)\n",
    "print(np.mean(np.cov(X_train.T)))\n",
    "print(np.mean(np.cov(latent_train.T)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "1a0drLZwEv9u"
   },
   "outputs": [],
   "source": [
    "# y = y[:2400]\n",
    "# latent_train = latent_train[:2400,:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 13,
     "status": "ok",
     "timestamp": 1693777537105,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "ZToawVOAkOGV",
    "outputId": "5fddc55b-7719-4db9-d875-44252c32f7a7"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2 6 9 ... 1 5 5]\n"
     ]
    }
   ],
   "source": [
    "# np.random.shuffle(y)\n",
    "# print(y)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "k3hp5R2cQCD9"
   },
   "source": [
    "# Tree Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 10065,
     "status": "ok",
     "timestamp": 1693777547164,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "R3tnvjyuQKpq",
    "outputId": "612bdede-92cf-424d-c22d-8086193c03cc"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Area under the ROC Curve: 0.49962070129324904\n"
     ]
    }
   ],
   "source": [
    "et = ExtraTreesClassifier(n_estimators=500, min_samples_leaf=100,\n",
    "                          max_features='sqrt', bootstrap=True, class_weight='balanced', n_jobs=-1)\n",
    "\n",
    "# et = RandomForestClassifier(n_estimators=500, min_samples_leaf=100,\n",
    "#                           max_features='sqrt', bootstrap=True, class_weight='balanced', n_jobs=-1)\n",
    "\n",
    "# validation instance\n",
    "skf = StratifiedKFold(n_splits=5, shuffle=True)\n",
    "\n",
    "# getting the model validation predictions\n",
    "preds = cross_val_predict(et, latent_train, y, cv=skf, method='predict_proba')\n",
    "\n",
    "# evaluating the model\n",
    "print('Area under the ROC Curve:', roc_auc_score(y, preds, multi_class='ovo'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 92
    },
    "executionInfo": {
     "elapsed": 1905,
     "status": "ok",
     "timestamp": 1693777549046,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "T2zHZflUQlOr",
    "outputId": "236a1931-e454-47a2-81dc-d72e48355305"
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>#sk-container-id-20 {color: black;background-color: white;}#sk-container-id-20 pre{padding: 0;}#sk-container-id-20 div.sk-toggleable {background-color: white;}#sk-container-id-20 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-20 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-20 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-20 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-20 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-20 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-20 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-20 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-20 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-20 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-20 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-20 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-20 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-20 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-20 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-20 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-20 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-20 div.sk-item {position: relative;z-index: 1;}#sk-container-id-20 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-20 div.sk-item::before, #sk-container-id-20 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-20 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-20 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-20 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-20 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-20 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-20 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-20 div.sk-label-container {text-align: center;}#sk-container-id-20 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-20 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-20\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>ExtraTreesClassifier(bootstrap=True, class_weight=&#x27;balanced&#x27;,\n",
       "                     min_samples_leaf=100, n_estimators=500, n_jobs=-1)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-20\" type=\"checkbox\" checked><label for=\"sk-estimator-id-20\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">ExtraTreesClassifier</label><div class=\"sk-toggleable__content\"><pre>ExtraTreesClassifier(bootstrap=True, class_weight=&#x27;balanced&#x27;,\n",
       "                     min_samples_leaf=100, n_estimators=500, n_jobs=-1)</pre></div></div></div></div></div>"
      ],
      "text/plain": [
       "ExtraTreesClassifier(bootstrap=True, class_weight='balanced',\n",
       "                     min_samples_leaf=100, n_estimators=500, n_jobs=-1)"
      ]
     },
     "execution_count": 1898,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "et.fit(latent_train,y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 4022,
     "status": "ok",
     "timestamp": 1693777553065,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "6tMnYzZ_VPZR",
    "outputId": "43ab2863-957c-41e6-d4a8-d0b5d1512328"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(2400, 500)\n",
      "[[19 11 17 ...  3 15 16]\n",
      " [ 4 10  7 ...  7  9 16]\n",
      " [19  4  6 ... 11 14 11]\n",
      " ...\n",
      " [14 16 13 ...  7  9 12]\n",
      " [ 3  4  6 ... 11 14 11]\n",
      " [14 15  2 ...  6 16  4]]\n",
      "0.8937245531062129\n",
      "4.0600590192059007e-05\n"
     ]
    }
   ],
   "source": [
    "leaves_train = et.apply(latent_train)\n",
    "print(leaves_train.shape)\n",
    "print(leaves_train)\n",
    "\n",
    "distances_train = np.zeros((500,500))\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_train[i,j] = hamming(leaves_train[i,:], leaves_train[j,:])\n",
    "\n",
    "score_train = sum(distances_train)/499\n",
    "\n",
    "print(np.mean(score_train))\n",
    "print(np.cov(score_train))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "Q55SNuDNhQQ1"
   },
   "outputs": [],
   "source": [
    "# model_Conv_AE_OOD_MNIST_train_30 = 'classifier.pt'\n",
    "# path = \"/content/gdrive/My Drive/{model_Conv_AE_OOD_MNIST_train_30}\"\n",
    "# torch.save(model.state_dict(), path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "0hpY3jsdhQUb"
   },
   "outputs": [],
   "source": [
    "# model_Conv_AE_OOD_MNIST_train_30 = 'classifier.pt'\n",
    "# path = \"/content/gdrive/My Drive/{model_Conv_AE_OOD_MNIST_train_30}\"\n",
    "# model.load_state_dict(torch.load(path))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "U6SnpaEVWOQf"
   },
   "source": [
    "# Testing on ID Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "R6olj5rRmTpo"
   },
   "outputs": [],
   "source": [
    "#num_epoch = 30"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "BLgLZ7STVPbU"
   },
   "outputs": [],
   "source": [
    "# test_dataset_MNIST = torchvision.datasets.MNIST(root=\".MNIST/test\", train=False, transform=transform, download=True)\n",
    "# test_loader_MNIST = torch.utils.data.DataLoader(test_dataset_MNIST, batch_size = 64, shuffle=True)\n",
    "\n",
    "# # model = ConvAutoencoder()\n",
    "# # criterion = nn.MSELoss()\n",
    "# # optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n",
    "\n",
    "# train(model, test_loader_MNIST, criterion, optimizer, num_epochs=num_epoch, online=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 3860,
     "status": "ok",
     "timestamp": 1693777556922,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "zM65LrAhVPda",
    "outputId": "2f60cae6-7d4c-4935-a030-dca735a45842"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(10000, 1, 28, 28)\n",
      "(10000, 4, 7, 7)\n",
      "(10000, 784)\n",
      "(10000, 196)\n"
     ]
    }
   ],
   "source": [
    "test_dataset_MNIST = torchvision.datasets.FashionMNIST(root=\".FashionMNIST/test\", train=False, transform=transform, download=True)\n",
    "test_loader_MNIST = torch.utils.data.DataLoader(test_dataset_MNIST, batch_size = 10000, shuffle=True)\n",
    "\n",
    "for batch in test_loader_MNIST:\n",
    "    img, _ = batch\n",
    "    #img = torchvision.transforms.Grayscale(num_output_channels=1)(img)\n",
    "    #img = img.reshape(-1, 28*28)\n",
    "\n",
    "    # Generating output\n",
    "    out = model.forward(img)\n",
    "    latent_test = model.forward_encoder(img)\n",
    "\n",
    "\n",
    "latent_test = latent_test.detach().numpy()\n",
    "X_test = img.detach().numpy()\n",
    "print(X_test.shape)\n",
    "print(latent_test.shape)\n",
    "X_test = X_test.reshape(-1,1*28*28)\n",
    "latent_test = latent_test.reshape(-1,4*7*7)\n",
    "print(X_test.shape)\n",
    "print(latent_test.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 4548,
     "status": "ok",
     "timestamp": 1693777561460,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "5l77BQBXVPfP",
    "outputId": "530ad57d-110f-4f8c-a0b5-d5458165cb2c"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(10000, 500)\n",
      "[[ 9  8  7 ...  3  6  1]\n",
      " [13 15  2 ...  6  9  4]\n",
      " [ 9 15 10 ...  6  9  5]\n",
      " ...\n",
      " [16  8  8 ...  7  8 19]\n",
      " [14 15  8 ...  7  8 13]\n",
      " [13  4  2 ...  8 14  1]]\n",
      "0.8931809699398802\n",
      "3.362444956734138e-05\n"
     ]
    }
   ],
   "source": [
    "latent_test_in = latent_test\n",
    "\n",
    "leaves_test_in = et.apply(latent_test_in)\n",
    "print(leaves_test_in.shape)\n",
    "print(leaves_test_in)\n",
    "\n",
    "distances_test_in = np.zeros((500,500))\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_test_in[i,j] = hamming(leaves_test_in[i,:], leaves_test_in[j,:])\n",
    "\n",
    "score_test_in = sum(distances_test_in)/499\n",
    "print(np.mean(score_test_in))\n",
    "print(np.cov(score_test_in))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "zKHeKNSXWZQr"
   },
   "source": [
    "# Testing on OOD Data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "DQgAh1gaXxP6"
   },
   "source": [
    "## MNIST"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "DTO-dHR3hkcS"
   },
   "outputs": [],
   "source": [
    "# model_Conv_AE_OOD_MNIST_train_30 = 'classifier.pt'\n",
    "# path = \"/content/gdrive/My Drive/{model_Conv_AE_OOD_MNIST_train_30}\"\n",
    "# model.load_state_dict(torch.load(path))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 690,
     "status": "ok",
     "timestamp": 1693777562131,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "U6-VmvGdWLPA",
    "outputId": "4a1a8d8a-7c96-471c-e5d2-d30c30f19e6d"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(10000, 1, 28, 28)\n"
     ]
    }
   ],
   "source": [
    "df_test_out = pd.read_csv('/content/gdrive/MyDrive/TOOD/datasets/mnist_test.csv')\n",
    "data_test_out = np.array(df_test_out)\n",
    "\n",
    "X_test_out = data_test_out[:, 1:785]/255\n",
    "X_test_out = X_test_out.reshape((-1,1,28,28))\n",
    "print(X_test_out.shape)\n",
    "\n",
    "# X_test_out_tensor = torch.tensor(X_test_out, dtype=torch.float32)\n",
    "# test_loader_Fashion = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=64, shuffle=True)\n",
    "\n",
    "# # model = ConvAutoencoder()\n",
    "# # criterion = nn.MSELoss()\n",
    "# # optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n",
    "\n",
    "# train(model, test_loader_Fashion, criterion, optimizer, num_epochs=num_epoch)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 2452,
     "status": "ok",
     "timestamp": 1693777564580,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "G-T51piGWLRU",
    "outputId": "65aeede2-fbb9-41b0-9a12-b9188a780365"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(10000, 4, 7, 7)\n",
      "(10000, 196)\n"
     ]
    }
   ],
   "source": [
    "X_test_out_tensor = torch.tensor(X_test_out, dtype=torch.float32)\n",
    "test_loader_Fashion = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=10000, shuffle=True)\n",
    "\n",
    "for batch in test_loader_Fashion:\n",
    "\n",
    "    img = batch\n",
    "    #img = img.reshape(-1,1,28,28)\n",
    "\n",
    "    # Generating output\n",
    "    out = model.forward(img)\n",
    "    latent_test_out = model.forward_encoder(img)\n",
    "\n",
    "latent_test_out_Fashion = latent_test_out.detach().numpy()\n",
    "#latent_test_out = latent_test_out[0,:,:]\n",
    "print(latent_test_out_Fashion.shape)\n",
    "latent_test_out_Fashion = latent_test_out_Fashion.reshape(-1,4*7*7)\n",
    "print(latent_test_out_Fashion.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 6388,
     "status": "ok",
     "timestamp": 1693777570965,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "RMYCE2rXYeqT",
    "outputId": "40953f23-9947-43f5-9c9b-e9f0eea7e2f6"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(10000, 500)\n",
      "[[ 6 11 18 ... 13 14 16]\n",
      " [ 6  4 16 ... 14 13 16]\n",
      " [19  4 17 ... 14  5 16]\n",
      " ...\n",
      " [ 7  4 18 ... 14 14 16]\n",
      " [ 6  8 16 ... 14 14 19]\n",
      " [18  4 13 ... 14  5 16]]\n",
      "0.6914437194388777\n",
      "0.002269430331407682\n"
     ]
    }
   ],
   "source": [
    "leaves_test_out_Fashion = et.apply(latent_test_out_Fashion)\n",
    "\n",
    "print(leaves_test_out_Fashion.shape)\n",
    "print(leaves_test_out_Fashion)\n",
    "\n",
    "distances_test_out_Fashion = np.zeros((500,500))\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_test_out_Fashion[i,j] = hamming(leaves_test_out_Fashion[i,:], leaves_test_out_Fashion[j,:])\n",
    "\n",
    "score_test_out_Fashion = sum(distances_test_out_Fashion)/499\n",
    "\n",
    "print(np.mean(score_test_out_Fashion))\n",
    "print(np.cov(score_test_out_Fashion))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "hNZJuuwNO5AV"
   },
   "source": [
    "## BreastMNIST"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "I8rDNw7QO6U5"
   },
   "outputs": [],
   "source": [
    "# model_Conv_AE_OOD_MNIST_train_30 = 'classifier.pt'\n",
    "# path = \"/content/gdrive/My Drive/{model_Conv_AE_OOD_MNIST_train_30}\"\n",
    "# model.load_state_dict(torch.load(path))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "TMX1QtM7O6cF"
   },
   "outputs": [],
   "source": [
    "df = np.load('/content/gdrive/MyDrive/TOOD/datasets/breastmnist.npz')\n",
    "#X_test_out = df['test_images']/255\n",
    "[size, _, _] = df['train_images'].shape\n",
    "X_test_out = df['train_images'].reshape((size, 784))/255\n",
    "\n",
    "X_test_out = X_test_out.reshape((-1,1,28,28))\n",
    "\n",
    "# X_test_out_tensor = torch.tensor(X_test_out, dtype=torch.float32)\n",
    "# test_loader_ChestMNIST = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=64, shuffle=True)\n",
    "\n",
    "# # model = ConvAutoencoder()\n",
    "# # criterion = nn.MSELoss()\n",
    "# # optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n",
    "\n",
    "# train(model, test_loader_ChestMNIST, criterion, optimizer, num_epochs=num_epoch)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 124,
     "status": "ok",
     "timestamp": 1693777571310,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "5_rUSplcO6eF",
    "outputId": "33340385-f6ee-4226-fffe-480ead3140d4"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "546\n",
      "(546, 4, 7, 7)\n",
      "(546, 196)\n"
     ]
    }
   ],
   "source": [
    "[size, _, _] = df['train_images'].shape\n",
    "print(size)\n",
    "\n",
    "X_test_out_tensor = torch.tensor(X_test_out, dtype=torch.float32)\n",
    "test_loader_BreastMNIST = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=size, shuffle=True)\n",
    "\n",
    "for batch in test_loader_BreastMNIST:\n",
    "\n",
    "    img = batch\n",
    "    #img = img.reshape(-1,1,28,28)\n",
    "\n",
    "    # Generating output\n",
    "    out = model.forward(img)\n",
    "    latent_test_out = model.forward_encoder(img)\n",
    "\n",
    "latent_test_out_Breast = latent_test_out.detach().numpy()\n",
    "#latent_test_out = latent_test_out[0,:,:]\n",
    "print(latent_test_out_Breast.shape)\n",
    "latent_test_out_Breast = latent_test_out_Breast.reshape(-1,4*7*7)\n",
    "print(latent_test_out_Breast.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 7082,
     "status": "ok",
     "timestamp": 1693777578390,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "XJ8p4wDAO6f4",
    "outputId": "c9e563e5-ccb9-48a7-fd0b-371decf0c59b"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(546, 500)\n",
      "[[18 11 13 ... 14 16  5]\n",
      " [16 10 16 ...  7  6  1]\n",
      " [19 10 16 ... 14  6  5]\n",
      " ...\n",
      " [16 10 16 ...  7  6  1]\n",
      " [18 10 16 ... 11 16  1]\n",
      " [18 11 16 ... 11 16  1]]\n",
      "0.457059254509018\n",
      "0.003879313033436223\n"
     ]
    }
   ],
   "source": [
    "leaves_test_out_Breast = et.apply(latent_test_out_Breast)\n",
    "\n",
    "print(leaves_test_out_Breast.shape)\n",
    "print(leaves_test_out_Breast)\n",
    "\n",
    "distances_test_out_Breast = np.zeros((500,500))\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_test_out_Breast[i,j] = hamming(leaves_test_out_Breast[i,:], leaves_test_out_Breast[j,:])\n",
    "\n",
    "score_test_out_Breast = sum(distances_test_out_Breast)/499\n",
    "\n",
    "print(np.mean(score_test_out_Breast))\n",
    "print(np.cov(score_test_out_Breast))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "1vkCzIAMy5Yi"
   },
   "source": [
    "## ChestMNIST"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "-I7QZNVUhnpI"
   },
   "outputs": [],
   "source": [
    "# model_Conv_AE_OOD_MNIST_train_30 = 'classifier.pt'\n",
    "# path = \"/content/gdrive/My Drive/{model_Conv_AE_OOD_MNIST_train_30}\"\n",
    "# model.load_state_dict(torch.load(path))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "3Nh3znXBv1za"
   },
   "outputs": [],
   "source": [
    "df = np.load('/content/gdrive/MyDrive/TOOD/datasets/chestmnist.npz')\n",
    "#X_test_out = df['test_images']/255\n",
    "[size, _, _] = df['test_images'].shape\n",
    "X_test_out = df['test_images'].reshape((size, 784))/255\n",
    "\n",
    "X_test_out = X_test_out.reshape((-1,1,28,28))\n",
    "\n",
    "# X_test_out_tensor = torch.tensor(X_test_out, dtype=torch.float32)\n",
    "# test_loader_ChestMNIST = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=64, shuffle=True)\n",
    "\n",
    "# # model = ConvAutoencoder()\n",
    "# # criterion = nn.MSELoss()\n",
    "# # optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n",
    "\n",
    "# train(model, test_loader_ChestMNIST, criterion, optimizer, num_epochs=num_epoch)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 5409,
     "status": "ok",
     "timestamp": 1693777584226,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "FFfdi--zv12B",
    "outputId": "5c04b56a-f29e-434b-fff4-21143d95687d"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "22433\n",
      "(22433, 4, 7, 7)\n",
      "(22433, 196)\n"
     ]
    }
   ],
   "source": [
    "[size, _, _] = df['test_images'].shape\n",
    "print(size)\n",
    "\n",
    "X_test_out_tensor = torch.tensor(X_test_out, dtype=torch.float32)\n",
    "test_loader_ChestMNIST = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=size, shuffle=True)\n",
    "\n",
    "for batch in test_loader_ChestMNIST:\n",
    "\n",
    "    img = batch\n",
    "    #img = img.reshape(-1,1,28,28)\n",
    "\n",
    "    # Generating output\n",
    "    out = model.forward(img)\n",
    "    latent_test_out = model.forward_encoder(img)\n",
    "\n",
    "latent_test_out_Chest = latent_test_out.detach().numpy()\n",
    "#latent_test_out = latent_test_out[0,:,:]\n",
    "print(latent_test_out_Chest.shape)\n",
    "latent_test_out_Chest = latent_test_out_Chest.reshape(-1,4*7*7)\n",
    "print(latent_test_out_Chest.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 4121,
     "status": "ok",
     "timestamp": 1693777588343,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "hLfKbVMzzxUi",
    "outputId": "7a22ec1d-aed1-426f-e573-b2f005459773"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(22433, 500)\n",
      "[[18  8 11 ... 11 16  4]\n",
      " [14 10 10 ... 11 16  1]\n",
      " [14  8 10 ...  7  6  4]\n",
      " ...\n",
      " [19 18 11 ... 11 16  5]\n",
      " [13  8 10 ...  6  6  1]\n",
      " [19  8 11 ... 11 16  4]]\n",
      "0.4444889298597194\n",
      "0.004354740913215967\n"
     ]
    }
   ],
   "source": [
    "leaves_test_out_Chest = et.apply(latent_test_out_Chest)\n",
    "\n",
    "print(leaves_test_out_Chest.shape)\n",
    "print(leaves_test_out_Chest)\n",
    "\n",
    "distances_test_out_Chest = np.zeros((500,500))\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_test_out_Chest[i,j] = hamming(leaves_test_out_Chest[i,:], leaves_test_out_Chest[j,:])\n",
    "\n",
    "score_test_out_Chest = sum(distances_test_out_Chest)/499\n",
    "\n",
    "print(np.mean(score_test_out_Chest))\n",
    "print(np.cov(score_test_out_Chest))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "Kuwv43MpPUjD"
   },
   "source": [
    "## OctMNIST"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "MI5QlskOPWn1"
   },
   "outputs": [],
   "source": [
    "# model_Conv_AE_OOD_MNIST_train_30 = 'classifier.pt'\n",
    "# path = \"/content/gdrive/My Drive/{model_Conv_AE_OOD_MNIST_train_30}\"\n",
    "# model.load_state_dict(torch.load(path))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "xIQbJtWjPWp8"
   },
   "outputs": [],
   "source": [
    "df = np.load('/content/gdrive/MyDrive/TOOD/datasets/octmnist.npz')\n",
    "#X_test_out = df['test_images']/255\n",
    "[size, _, _] = df['test_images'].shape\n",
    "X_test_out = df['test_images'].reshape((size, 784))/255\n",
    "\n",
    "X_test_out = X_test_out.reshape((-1,1,28,28))\n",
    "\n",
    "# X_test_out_tensor = torch.tensor(X_test_out, dtype=torch.float32)\n",
    "# test_loader_ChestMNIST = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=64, shuffle=True)\n",
    "\n",
    "# # model = ConvAutoencoder()\n",
    "# # criterion = nn.MSELoss()\n",
    "# # optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n",
    "\n",
    "# train(model, test_loader_ChestMNIST, criterion, optimizer, num_epochs=num_epoch)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 185,
     "status": "ok",
     "timestamp": 1693777588525,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "KKOyE9byPWr-",
    "outputId": "655ec572-2df4-4c9c-dd66-8d00d92b925b"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1000\n",
      "(1000, 4, 7, 7)\n",
      "(1000, 196)\n"
     ]
    }
   ],
   "source": [
    "[size, _, _] = df['test_images'].shape\n",
    "print(size)\n",
    "\n",
    "X_test_out_tensor = torch.tensor(X_test_out, dtype=torch.float32)\n",
    "test_loader_OctMNIST = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=size, shuffle=True)\n",
    "\n",
    "for batch in test_loader_OctMNIST:\n",
    "\n",
    "    img = batch\n",
    "    #img = img.reshape(-1,1,28,28)\n",
    "\n",
    "    # Generating output\n",
    "    out = model.forward(img)\n",
    "    latent_test_out = model.forward_encoder(img)\n",
    "\n",
    "latent_test_out_Oct = latent_test_out.detach().numpy()\n",
    "#latent_test_out = latent_test_out[0,:,:]\n",
    "print(latent_test_out_Oct.shape)\n",
    "latent_test_out_Oct = latent_test_out_Oct.reshape(-1,4*7*7)\n",
    "print(latent_test_out_Oct.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 4310,
     "status": "ok",
     "timestamp": 1693777592989,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "yzjA0YdpPWtk",
    "outputId": "0f972f92-01f1-4f53-a512-c2d49779f5d1"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(1000, 500)\n",
      "[[18 10 16 ... 14 16  5]\n",
      " [ 7 11 16 ... 14 16  5]\n",
      " [18 11 16 ... 14 16  5]\n",
      " ...\n",
      " [18  5 16 ... 14 16  5]\n",
      " [ 4 11 16 ... 14 16  5]\n",
      " [ 4 10 16 ... 14 16 13]]\n",
      "0.4147494188376754\n",
      "0.005415355122011038\n"
     ]
    }
   ],
   "source": [
    "leaves_test_out_Oct = et.apply(latent_test_out_Oct)\n",
    "\n",
    "print(leaves_test_out_Oct.shape)\n",
    "print(leaves_test_out_Oct)\n",
    "\n",
    "distances_test_out_Oct = np.zeros((500,500))\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_test_out_Oct[i,j] = hamming(leaves_test_out_Oct[i,:], leaves_test_out_Oct[j,:])\n",
    "\n",
    "score_test_out_Oct = sum(distances_test_out_Oct)/499\n",
    "\n",
    "print(np.mean(score_test_out_Oct))\n",
    "print(np.cov(score_test_out_Oct))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "_Tguoeb3Nosv"
   },
   "source": [
    "## PneumMNIST"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "yhageUDChpGt"
   },
   "outputs": [],
   "source": [
    "# model_Conv_AE_OOD_MNIST_train_30 = 'classifier.pt'\n",
    "# path = \"/content/gdrive/My Drive/{model_Conv_AE_OOD_MNIST_train_30}\"\n",
    "# model.load_state_dict(torch.load(path))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "SJN2w-zVzxWm"
   },
   "outputs": [],
   "source": [
    "df = np.load('/content/gdrive/MyDrive/TOOD/datasets/pneumoniamnist.npz')\n",
    "#X_test_out = df['train_images']/255\n",
    "[size, _, _] = df['train_images'].shape\n",
    "X_test_out = df['train_images'].reshape((size, 784))/255\n",
    "\n",
    "X_test_out = X_test_out.reshape((-1,1,28,28))\n",
    "\n",
    "# X_test_out_tensor = torch.tensor(X_test_out, dtype=torch.float32)\n",
    "# test_loader_Pneum = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=64, shuffle=True)\n",
    "\n",
    "# # model = ConvAutoencoder()\n",
    "# # criterion = nn.MSELoss()\n",
    "# # optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n",
    "\n",
    "# train(model, test_loader_Pneum, criterion, optimizer, num_epochs=num_epoch)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 1240,
     "status": "ok",
     "timestamp": 1693777594359,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "zmGFQYZvNtiL",
    "outputId": "042e5c30-ac3b-400d-def8-bd0f62d95557"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "4708\n",
      "(4708, 4, 7, 7)\n",
      "(4708, 196)\n"
     ]
    }
   ],
   "source": [
    "[size, _, _] = df['train_images'].shape\n",
    "print(size)\n",
    "\n",
    "X_test_out_tensor = torch.tensor(X_test_out, dtype=torch.float32)\n",
    "test_loader_Pneum = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=size, shuffle=True)\n",
    "\n",
    "for batch in test_loader_Pneum:\n",
    "\n",
    "    img = batch\n",
    "    #img = img.reshape(-1,1,28,28)\n",
    "\n",
    "    # Generating output\n",
    "    out = model.forward(img)\n",
    "    latent_test_out = model.forward_encoder(img)\n",
    "\n",
    "latent_test_out_Pneum = latent_test_out.detach().numpy()\n",
    "#latent_test_out = latent_test_out[0,:,:]\n",
    "print(latent_test_out_Pneum.shape)\n",
    "latent_test_out_Pneum = latent_test_out_Pneum.reshape(-1,4*7*7)\n",
    "print(latent_test_out_Pneum.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 3812,
     "status": "ok",
     "timestamp": 1693777598168,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "9SuNE4aSNt7b",
    "outputId": "796564ed-f384-4895-fa97-86ffa9a7de0f"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(4708, 500)\n",
      "[[14  8 10 ...  6  6  1]\n",
      " [18  8 10 ...  6  4  1]\n",
      " [16  8 10 ...  7  6  1]\n",
      " ...\n",
      " [14  8 10 ...  7  4  1]\n",
      " [14  8 10 ...  6  4  1]\n",
      " [16  8 10 ...  6  4  1]]\n",
      "0.3365375070140281\n",
      "0.0037587742533124052\n"
     ]
    }
   ],
   "source": [
    "leaves_test_out_Pneum = et.apply(latent_test_out_Pneum)\n",
    "\n",
    "print(leaves_test_out_Pneum.shape)\n",
    "print(leaves_test_out_Pneum)\n",
    "\n",
    "distances_test_out_Pneum = np.zeros((500,500))\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_test_out_Pneum[i,j] = hamming(leaves_test_out_Pneum[i,:], leaves_test_out_Pneum[j,:])\n",
    "\n",
    "score_test_out_Pneum = sum(distances_test_out_Pneum)/499\n",
    "\n",
    "print(np.mean(score_test_out_Pneum))\n",
    "print(np.cov(score_test_out_Pneum))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "o1iY4vTXPlN-"
   },
   "source": [
    "## OrganaMNIST"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "Eg8cWRsQPmdX"
   },
   "outputs": [],
   "source": [
    "# model_Conv_AE_OOD_MNIST_train_30 = 'classifier.pt'\n",
    "# path = \"/content/gdrive/My Drive/{model_Conv_AE_OOD_MNIST_train_30}\"\n",
    "# model.load_state_dict(torch.load(path))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "sQSXQXVxPmf6"
   },
   "outputs": [],
   "source": [
    "df = np.load('/content/gdrive/MyDrive/TOOD/datasets/organamnist.npz')\n",
    "#X_test_out = df['train_images']/255\n",
    "[size, _, _] = df['test_images'].shape\n",
    "X_test_out = df['test_images'].reshape((size, 784))/255\n",
    "\n",
    "X_test_out = X_test_out.reshape((-1,1,28,28))\n",
    "\n",
    "# X_test_out_tensor = torch.tensor(X_test_out, dtype=torch.float32)\n",
    "# test_loader_Organc = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=64, shuffle=True)\n",
    "\n",
    "# # model = ConvAutoencoder()\n",
    "# # criterion = nn.MSELoss()\n",
    "# # optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n",
    "\n",
    "# train(model, test_loader_Organc, criterion, optimizer, num_epochs=num_epoch)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 4020,
     "status": "ok",
     "timestamp": 1693777602518,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "-hl_Di8dPmhQ",
    "outputId": "f2c7f2a6-8873-4ece-940b-bd091cabd8b3"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "17778\n",
      "(17778, 4, 7, 7)\n",
      "(17778, 196)\n"
     ]
    }
   ],
   "source": [
    "[size, _, _] = df['test_images'].shape\n",
    "print(size)\n",
    "\n",
    "X_test_out_tensor = torch.tensor(X_test_out, dtype=torch.float32)\n",
    "test_loader_Organa = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=size, shuffle=True)\n",
    "\n",
    "for batch in test_loader_Organa:\n",
    "\n",
    "    img = batch\n",
    "    #img = img.reshape(-1,1,28,28)\n",
    "\n",
    "    # Generating output\n",
    "    out = model.forward(img)\n",
    "    latent_test_out = model.forward_encoder(img)\n",
    "\n",
    "latent_test_out_Organa = latent_test_out.detach().numpy()\n",
    "#latent_test_out = latent_test_out[0,:,:]\n",
    "print(latent_test_out_Organa.shape)\n",
    "latent_test_out_Organa = latent_test_out_Organa.reshape(-1,4*7*7)\n",
    "print(latent_test_out_Organa.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 4363,
     "status": "ok",
     "timestamp": 1693777606859,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "QTVnysAIPmjU",
    "outputId": "bd355c55-37df-4a56-b4e2-c1fe3f5944f9"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(17778, 500)\n",
      "[[20 10 16 ... 14  6  1]\n",
      " [16 11 17 ... 14 16  1]\n",
      " [16  8  2 ...  7 16  1]\n",
      " ...\n",
      " [ 7 11 16 ... 14 16  5]\n",
      " [16  8 10 ...  7  6  1]\n",
      " [13 10 10 ...  6  6  1]]\n",
      "0.622519246492986\n",
      "0.007519940942672991\n"
     ]
    }
   ],
   "source": [
    "leaves_test_out_Organa = et.apply(latent_test_out_Organa)\n",
    "\n",
    "print(leaves_test_out_Organa.shape)\n",
    "print(leaves_test_out_Organa)\n",
    "\n",
    "distances_test_out_Organa = np.zeros((500,500))\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_test_out_Organa[i,j] = hamming(leaves_test_out_Organa[i,:], leaves_test_out_Organa[j,:])\n",
    "\n",
    "score_test_out_Organa = sum(distances_test_out_Organa)/499\n",
    "\n",
    "print(np.mean(score_test_out_Organa))\n",
    "print(np.cov(score_test_out_Organa))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "UEKwndGfCmv6"
   },
   "source": [
    "## OrgancMNIST"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "J1GLB1jDh3uE"
   },
   "outputs": [],
   "source": [
    "# model_Conv_AE_OOD_MNIST_train_30 = 'classifier.pt'\n",
    "# path = \"/content/gdrive/My Drive/{model_Conv_AE_OOD_MNIST_train_30}\"\n",
    "# model.load_state_dict(torch.load(path))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "6OlXP64YNuYF"
   },
   "outputs": [],
   "source": [
    "df = np.load('/content/gdrive/MyDrive/TOOD/datasets/organcmnist.npz')\n",
    "#X_test_out = df['train_images']/255\n",
    "[size, _, _] = df['test_images'].shape\n",
    "X_test_out = df['test_images'].reshape((size, 784))/255\n",
    "\n",
    "X_test_out = X_test_out.reshape((-1,1,28,28))\n",
    "\n",
    "# X_test_out_tensor = torch.tensor(X_test_out, dtype=torch.float32)\n",
    "# test_loader_Organc = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=64, shuffle=True)\n",
    "\n",
    "# # model = ConvAutoencoder()\n",
    "# # criterion = nn.MSELoss()\n",
    "# # optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n",
    "\n",
    "# train(model, test_loader_Organc, criterion, optimizer, num_epochs=num_epoch)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 1979,
     "status": "ok",
     "timestamp": 1693777608965,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "GneANgXlNuoo",
    "outputId": "81eb0955-35a9-4936-daee-e89da545f8b5"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "8268\n",
      "(8268, 4, 7, 7)\n",
      "(8268, 196)\n"
     ]
    }
   ],
   "source": [
    "[size, _, _] = df['test_images'].shape\n",
    "print(size)\n",
    "\n",
    "X_test_out_tensor = torch.tensor(X_test_out, dtype=torch.float32)\n",
    "test_loader_Organc = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=size, shuffle=True)\n",
    "\n",
    "for batch in test_loader_Organc:\n",
    "\n",
    "    img = batch\n",
    "    #img = img.reshape(-1,1,28,28)\n",
    "\n",
    "    # Generating output\n",
    "    out = model.forward(img)\n",
    "    latent_test_out = model.forward_encoder(img)\n",
    "\n",
    "latent_test_out_Organc = latent_test_out.detach().numpy()\n",
    "#latent_test_out = latent_test_out[0,:,:]\n",
    "print(latent_test_out_Organc.shape)\n",
    "latent_test_out_Organc = latent_test_out_Organc.reshape(-1,4*7*7)\n",
    "print(latent_test_out_Organc.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 4102,
     "status": "ok",
     "timestamp": 1693777613063,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "poQYWGAOC5gL",
    "outputId": "a14871e3-4dd8-4380-c048-bd38e2ba5fd0"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(8268, 500)\n",
      "[[20  5 16 ... 14 16  1]\n",
      " [18 10 13 ...  7  4  1]\n",
      " [ 9  8 10 ...  6  6  1]\n",
      " ...\n",
      " [16 11 13 ...  7  6  1]\n",
      " [16 11 16 ...  6 16  1]\n",
      " [14 10 10 ...  7  6  1]]\n",
      "0.6102598957915831\n",
      "0.007140277156151874\n"
     ]
    }
   ],
   "source": [
    "leaves_test_out_Organc = et.apply(latent_test_out_Organc)\n",
    "\n",
    "print(leaves_test_out_Organc.shape)\n",
    "print(leaves_test_out_Organc)\n",
    "\n",
    "distances_test_out_Organc = np.zeros((500,500))\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_test_out_Organc[i,j] = hamming(leaves_test_out_Organc[i,:], leaves_test_out_Organc[j,:])\n",
    "\n",
    "score_test_out_Organc = sum(distances_test_out_Organc)/499\n",
    "\n",
    "print(np.mean(score_test_out_Organc))\n",
    "print(np.cov(score_test_out_Organc))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "S9qWBt2uP5Iq"
   },
   "source": [
    "## OrgansMNIST"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "hUVdNi9sP6ma"
   },
   "outputs": [],
   "source": [
    "# model_Conv_AE_OOD_MNIST_train_30 = 'classifier.pt'\n",
    "# path = \"/content/gdrive/My Drive/{model_Conv_AE_OOD_MNIST_train_30}\"\n",
    "# model.load_state_dict(torch.load(path))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "fdtvZMXhP6od"
   },
   "outputs": [],
   "source": [
    "df = np.load('/content/gdrive/MyDrive/TOOD/datasets/organsmnist.npz')\n",
    "#X_test_out = df['train_images']/255\n",
    "[size, _, _] = df['test_images'].shape\n",
    "X_test_out = df['test_images'].reshape((size, 784))/255\n",
    "\n",
    "X_test_out = X_test_out.reshape((-1,1,28,28))\n",
    "\n",
    "# X_test_out_tensor = torch.tensor(X_test_out, dtype=torch.float32)\n",
    "# test_loader_Organc = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=64, shuffle=True)\n",
    "\n",
    "# # model = ConvAutoencoder()\n",
    "# # criterion = nn.MSELoss()\n",
    "# # optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n",
    "\n",
    "# train(model, test_loader_Organc, criterion, optimizer, num_epochs=num_epoch)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 2062,
     "status": "ok",
     "timestamp": 1693777615252,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "RhHM9LdQP6qW",
    "outputId": "219d6236-500d-4295-d562-5c62ab4b2dc4"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "8829\n",
      "(8829, 4, 7, 7)\n",
      "(8829, 196)\n"
     ]
    }
   ],
   "source": [
    "[size, _, _] = df['test_images'].shape\n",
    "print(size)\n",
    "\n",
    "X_test_out_tensor = torch.tensor(X_test_out, dtype=torch.float32)\n",
    "test_loader_Organs = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=size, shuffle=True)\n",
    "\n",
    "for batch in test_loader_Organs:\n",
    "\n",
    "    img = batch\n",
    "    #img = img.reshape(-1,1,28,28)\n",
    "\n",
    "    # Generating output\n",
    "    out = model.forward(img)\n",
    "    latent_test_out = model.forward_encoder(img)\n",
    "\n",
    "latent_test_out_Organs = latent_test_out.detach().numpy()\n",
    "#latent_test_out = latent_test_out[0,:,:]\n",
    "print(latent_test_out_Organs.shape)\n",
    "latent_test_out_Organs = latent_test_out_Organs.reshape(-1,4*7*7)\n",
    "print(latent_test_out_Organs.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 4989,
     "status": "ok",
     "timestamp": 1693777620237,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "0SJuP7e4P6sI",
    "outputId": "aac30369-0906-4da2-d288-b5f2f6a9f5ca"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(8829, 500)\n",
      "[[19  8  2 ...  7  6  1]\n",
      " [20 18 17 ... 14  6  1]\n",
      " [16  8 11 ...  7  4  1]\n",
      " ...\n",
      " [18  3 18 ...  8 16  5]\n",
      " [13  8  2 ...  6  6  1]\n",
      " [14  8 10 ...  7  6  1]]\n",
      "0.6127244248496994\n",
      "0.0073032816240303725\n"
     ]
    }
   ],
   "source": [
    "leaves_test_out_Organs = et.apply(latent_test_out_Organs)\n",
    "\n",
    "print(leaves_test_out_Organs.shape)\n",
    "print(leaves_test_out_Organs)\n",
    "\n",
    "distances_test_out_Organs = np.zeros((500,500))\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_test_out_Organs[i,j] = hamming(leaves_test_out_Organs[i,:], leaves_test_out_Organs[j,:])\n",
    "\n",
    "score_test_out_Organs = sum(distances_test_out_Organs)/499\n",
    "\n",
    "print(np.mean(score_test_out_Organs))\n",
    "print(np.cov(score_test_out_Organs))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "PK_cY3XVDxlQ"
   },
   "source": [
    "## TissueMNIST"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "rXhtk9Mah8mR"
   },
   "outputs": [],
   "source": [
    "# model_Conv_AE_OOD_MNIST_train_30 = 'classifier.pt'\n",
    "# path = \"/content/gdrive/My Drive/{model_Conv_AE_OOD_MNIST_train_30}\"\n",
    "# model.load_state_dict(torch.load(path))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "ViCtSF5iC5ie"
   },
   "outputs": [],
   "source": [
    "df = np.load('/content/gdrive/MyDrive/TOOD/datasets/tissuemnist.npz')\n",
    "#X_test_out = df['test_images']/255\n",
    "[size, _, _] = df['test_images'].shape\n",
    "X_test_out = df['test_images'].reshape((size, 784))/255\n",
    "\n",
    "X_test_out = X_test_out.reshape((-1,1,28,28))\n",
    "\n",
    "# X_test_out_tensor = torch.tensor(X_test_out, dtype=torch.float32)\n",
    "# test_loader_Tissue = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=64, shuffle=True)\n",
    "\n",
    "# # model = ConvAutoencoder()\n",
    "# # criterion = nn.MSELoss()\n",
    "# # optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n",
    "\n",
    "# train(model, test_loader_Tissue, criterion, optimizer, num_epochs=num_epoch)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 10562,
     "status": "ok",
     "timestamp": 1693777631648,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "iSIHWqqOC5kT",
    "outputId": "988f9082-ec2e-4bbb-ffba-6dc89a7009c9"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "47280\n",
      "(47280, 4, 7, 7)\n",
      "(47280, 196)\n"
     ]
    }
   ],
   "source": [
    "[size, _, _] = df['test_images'].shape\n",
    "print(size)\n",
    "\n",
    "X_test_out_tensor = torch.tensor(X_test_out, dtype=torch.float32)\n",
    "test_loader_Tissue = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=size, shuffle=True)\n",
    "\n",
    "for batch in test_loader_Tissue:\n",
    "\n",
    "    img = batch\n",
    "    #img = img.reshape(-1,1,28,28)\n",
    "\n",
    "    # Generating output\n",
    "    out = model.forward(img)\n",
    "    latent_test_out = model.forward_encoder(img)\n",
    "\n",
    "latent_test_out_Tissue = latent_test_out.detach().numpy()\n",
    "#latent_test_out = latent_test_out[0,:,:]\n",
    "print(latent_test_out_Tissue.shape)\n",
    "latent_test_out_Tissue = latent_test_out_Tissue.reshape(-1,4*7*7)\n",
    "print(latent_test_out_Tissue.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 5276,
     "status": "ok",
     "timestamp": 1693777636920,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "IUQ3nT57Egcv",
    "outputId": "f7f56120-f17d-485f-e3fa-3f9fc90dd3db"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(47280, 500)\n",
      "[[ 7  5 16 ... 14 15  5]\n",
      " [ 7 11 16 ... 14 16  5]\n",
      " [ 7 11 16 ... 13  5  1]\n",
      " ...\n",
      " [ 7  5 16 ... 14 13  5]\n",
      " [18  5 17 ... 14 13  5]\n",
      " [ 7  5 16 ... 14 13  5]]\n",
      "0.4205789659318637\n",
      "0.004957656652298349\n"
     ]
    }
   ],
   "source": [
    "leaves_test_out_Tissue = et.apply(latent_test_out_Tissue)\n",
    "\n",
    "print(leaves_test_out_Tissue.shape)\n",
    "print(leaves_test_out_Tissue)\n",
    "\n",
    "distances_test_out_Tissue = np.zeros((500,500))\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_test_out_Tissue[i,j] = hamming(leaves_test_out_Tissue[i,:], leaves_test_out_Tissue[j,:])\n",
    "\n",
    "score_test_out_Tissue = sum(distances_test_out_Tissue)/499\n",
    "\n",
    "print(np.mean(score_test_out_Tissue))\n",
    "print(np.cov(score_test_out_Tissue))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "V9pDbbT2Q9xv"
   },
   "source": [
    "## KMNIST"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "e7-UM77Fh-w-"
   },
   "outputs": [],
   "source": [
    "# model_Conv_AE_OOD_MNIST_train_30 = 'classifier.pt'\n",
    "# path = \"/content/gdrive/My Drive/{model_Conv_AE_OOD_MNIST_train_30}\"\n",
    "# model.load_state_dict(torch.load(path))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "4BiRKanXxvFv"
   },
   "outputs": [],
   "source": [
    "# test_dataset_KMNIST = torchvision.datasets.KMNIST(root=\"./data\", train=False, transform=transform, download=True)\n",
    "# test_loader_KMNIST = torch.utils.data.DataLoader(test_dataset_KMNIST, batch_size = 64, shuffle=True)\n",
    "\n",
    "# # model = ConvAutoencoder()\n",
    "# # criterion = nn.MSELoss()\n",
    "# # optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n",
    "\n",
    "# train(model, test_loader_KMNIST, criterion, optimizer, num_epochs=num_epoch, online=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 4016,
     "status": "ok",
     "timestamp": 1693777640915,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "IrDPmFlTxvH4",
    "outputId": "7a17e4f4-4527-46c4-f039-774cad48421b"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(10000, 196)\n"
     ]
    }
   ],
   "source": [
    "transform = transforms.Compose(\n",
    "    [transforms.Resize((28, 28)),  # Resize images to match the size of MNIST\n",
    "     transforms.ToTensor()])\n",
    "\n",
    "test_dataset_KMNIST = torchvision.datasets.KMNIST(root='./data', train=False, download=True, transform=transform)\n",
    "test_loader_KMNIST = torch.utils.data.DataLoader(test_dataset_KMNIST, batch_size=10000, shuffle=True)\n",
    "\n",
    "for batch in test_loader_KMNIST:\n",
    "    img, _ = batch\n",
    "    #img = torchvision.transforms.Grayscale(num_output_channels=1)(img)\n",
    "\n",
    "    # Generating output\n",
    "    out = model.forward(img)\n",
    "    latent_test_out = model.forward_encoder(img)\n",
    "\n",
    "\n",
    "latent_test_out_KMNIST = latent_test_out.detach().numpy()\n",
    "latent_test_out_KMNIST = latent_test_out_KMNIST.reshape(-1,4*7*7)\n",
    "print(latent_test_out_KMNIST.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 4616,
     "status": "ok",
     "timestamp": 1693777645528,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "WqFrdnepRkI7",
    "outputId": "b9c800ef-90a0-448b-c35b-d744e1c1f69e"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(10000, 500)\n",
      "[[ 6  8  7 ... 14  5 19]\n",
      " [ 4 10 13 ...  8 13 16]\n",
      " [ 7  4  6 ... 14  9 20]\n",
      " ...\n",
      " [ 7 11 13 ... 14  8  5]\n",
      " [20 18 17 ... 14  9 13]\n",
      " [19  5 13 ... 12 13 13]]\n",
      "0.8213646012024048\n",
      "0.0012461203126889625\n"
     ]
    }
   ],
   "source": [
    "leaves_test_out_KMNIST = et.apply(latent_test_out_KMNIST)\n",
    "\n",
    "print(leaves_test_out_KMNIST.shape)\n",
    "print(leaves_test_out_KMNIST)\n",
    "\n",
    "distances_test_out_KMNIST = np.zeros((500,500))\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_test_out_KMNIST[i,j] = hamming(leaves_test_out_KMNIST[i,:], leaves_test_out_KMNIST[j,:])\n",
    "\n",
    "score_test_out_KMNIST = sum(distances_test_out_KMNIST)/499\n",
    "\n",
    "print(np.mean(score_test_out_KMNIST))\n",
    "print(np.cov(score_test_out_KMNIST))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "brj6eAE_SJn2"
   },
   "source": [
    "## QMNIST"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "dIaTRR3kif-b"
   },
   "outputs": [],
   "source": [
    "# model_Conv_AE_OOD_MNIST_train_30 = 'classifier.pt'\n",
    "# path = \"/content/gdrive/My Drive/{model_Conv_AE_OOD_MNIST_train_30}\"\n",
    "# model.load_state_dict(torch.load(path))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "1iG0aHi7xvJH"
   },
   "outputs": [],
   "source": [
    "# test_dataset_QMNIST = torchvision.datasets.QMNIST(root=\"./data\", train=False, transform=transform, download=True)\n",
    "# test_loader_QMNIST = torch.utils.data.DataLoader(test_dataset_QMNIST, batch_size = 64, shuffle=True)\n",
    "\n",
    "# # model = ConvAutoencoder()\n",
    "# # criterion = nn.MSELoss()\n",
    "# # optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n",
    "\n",
    "# train(model, test_loader_QMNIST, criterion, optimizer, num_epochs=num_epoch, online=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 22810,
     "status": "ok",
     "timestamp": 1693777668319,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "gXmIqYEnSMZs",
    "outputId": "9069e4fb-6e2e-4449-c9a9-4418bfd121f3"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(10000, 196)\n"
     ]
    }
   ],
   "source": [
    "transform = transforms.Compose(\n",
    "    [transforms.Resize((28, 28)),  # Resize images to match the size of MNIST\n",
    "     transforms.ToTensor()])\n",
    "\n",
    "test_dataset_QMNIST = torchvision.datasets.QMNIST(root='./data', train=False, download=True, transform=transform)\n",
    "test_loader_QMNIST = torch.utils.data.DataLoader(test_dataset_QMNIST, batch_size=10000, shuffle=True)\n",
    "\n",
    "for batch in test_loader_QMNIST:\n",
    "    img, _ = batch\n",
    "    #img = torchvision.transforms.Grayscale(num_output_channels=1)(img)\n",
    "\n",
    "    # Generating output\n",
    "    out = model.forward(img)\n",
    "    latent_test_out = model.forward_encoder(img)\n",
    "\n",
    "\n",
    "latent_test_out_QMNIST = latent_test_out.detach().numpy()\n",
    "latent_test_out_QMNIST = latent_test_out_QMNIST.reshape(-1,4*7*7)\n",
    "print(latent_test_out_QMNIST.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 4383,
     "status": "ok",
     "timestamp": 1693777672675,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "OW51VvQQSMbw",
    "outputId": "743fb4e3-7634-44df-f004-8eee57bbebd8"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(10000, 500)\n",
      "[[ 4 11 16 ... 13 13 20]\n",
      " [19  5 13 ... 14  5 16]\n",
      " [19  4 16 ... 14 14  7]\n",
      " ...\n",
      " [ 4  4 16 ... 13 14 16]\n",
      " [19  4  8 ... 14  5 19]\n",
      " [ 7 11 16 ... 14 14 16]]\n",
      "0.6896508376753506\n",
      "0.002805395450891807\n"
     ]
    }
   ],
   "source": [
    "leaves_test_out_QMNIST = et.apply(latent_test_out_QMNIST)\n",
    "\n",
    "print(leaves_test_out_QMNIST.shape)\n",
    "print(leaves_test_out_QMNIST)\n",
    "\n",
    "distances_test_out_QMNIST = np.zeros((500,500))\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_test_out_QMNIST[i,j] = hamming(leaves_test_out_QMNIST[i,:], leaves_test_out_QMNIST[j,:])\n",
    "\n",
    "score_test_out_QMNIST = sum(distances_test_out_QMNIST)/499\n",
    "\n",
    "print(np.mean(score_test_out_QMNIST))\n",
    "print(np.cov(score_test_out_QMNIST))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "iKdP-64sRyQ8"
   },
   "source": [
    "## Omniglot"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "GNAQF4F4R9JQ"
   },
   "outputs": [],
   "source": [
    "# model_Conv_AE_OOD_MNIST_train_30 = 'classifier.pt'\n",
    "# path = \"/content/gdrive/My Drive/{model_Conv_AE_OOD_MNIST_train_30}\"\n",
    "# model.load_state_dict(torch.load(path))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "bQdeByYMSFYz"
   },
   "outputs": [],
   "source": [
    "# test_dataset_Omniglot = torchvision.datasets.Omniglot(root='./data', download=True, transform=transform)\n",
    "# test_loader_Omniglot = torch.utils.data.DataLoader(test_dataset_QMNIST, batch_size = 64, shuffle=True)\n",
    "\n",
    "# # model = ConvAutoencoder()\n",
    "# # criterion = nn.MSELoss()\n",
    "# # optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n",
    "\n",
    "# train(model, test_loader_Omniglot, criterion, optimizer, num_epochs=num_epoch, online=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 12389,
     "status": "ok",
     "timestamp": 1693777685043,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "rOznxhcjSMd0",
    "outputId": "074f4dd6-2bd9-4ee7-fad5-92c17f812d50"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Files already downloaded and verified\n",
      "(19280, 196)\n"
     ]
    }
   ],
   "source": [
    "transform = transforms.Compose(\n",
    "    [transforms.Resize((28, 28)),  # Resize images to match the size of MNIST\n",
    "     transforms.ToTensor()])\n",
    "\n",
    "test_dataset_Omniglot = torchvision.datasets.Omniglot(root='./data', download=True, transform=transform)\n",
    "\n",
    "# Creating Dataloaders from the\n",
    "# training and testing dataset\n",
    "test_loader_Omniglot = torch.utils.data.DataLoader(test_dataset_Omniglot, batch_size=32460, shuffle=True)\n",
    "\n",
    "for batch in test_loader_Omniglot:\n",
    "    img, _ = batch\n",
    "    #img = img.reshape(-1,1,28,28)\n",
    "\n",
    "    # Generating output\n",
    "    out = model.forward(img)\n",
    "    latent_test_out = model.forward_encoder(img)\n",
    "\n",
    "\n",
    "latent_test_out_Omniglot = latent_test_out.detach().numpy()\n",
    "latent_test_out_Omniglot = latent_test_out_Omniglot.reshape(-1,4*7*7)\n",
    "print(latent_test_out_Omniglot.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 4071,
     "status": "ok",
     "timestamp": 1693777689089,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "2l0MVvcaSMfp",
    "outputId": "3e1b3eba-4cef-4de0-94df-fde985d5c16b"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(19280, 500)\n",
      "[[ 9  8 10 ...  6  6  1]\n",
      " [ 9  8  2 ...  6  4  1]\n",
      " [13 10 10 ...  6 16  1]\n",
      " ...\n",
      " [13  8  2 ...  6  6  1]\n",
      " [13 10 10 ...  6  6  1]\n",
      " [14 10 10 ... 11  6  1]]\n",
      "0.20521898196392788\n",
      "0.0007923738987085201\n"
     ]
    }
   ],
   "source": [
    "leaves_test_out_Omniglot = et.apply(latent_test_out_Omniglot)\n",
    "\n",
    "print(leaves_test_out_Omniglot.shape)\n",
    "print(leaves_test_out_Omniglot)\n",
    "\n",
    "distances_test_out_Omniglot = np.zeros((500,500))\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_test_out_Omniglot[i,j] = hamming(leaves_test_out_Omniglot[i,:], leaves_test_out_Omniglot[j,:])\n",
    "\n",
    "score_test_out_Omniglot = sum(distances_test_out_Omniglot)/499\n",
    "\n",
    "print(np.mean(score_test_out_Omniglot))\n",
    "print(np.cov(score_test_out_Omniglot))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "u9OZcvTSXkPh"
   },
   "source": [
    "## Cifar10bw"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "QfMu7Z3kXol1"
   },
   "outputs": [],
   "source": [
    "# model_Conv_AE_OOD_MNIST_train_30 = 'classifier.pt'\n",
    "# path = \"/content/gdrive/My Drive/{model_Conv_AE_OOD_MNIST_train_30}\"\n",
    "# model.load_state_dict(torch.load(path))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "Ej9JmeDhX0NG"
   },
   "outputs": [],
   "source": [
    "# test_dataset_Cifar10bw = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)\n",
    "# test_loader_Cifar10bw = torch.utils.data.DataLoader(test_dataset_Cifar10bw, batch_size = 64, shuffle=True)\n",
    "\n",
    "# # model = ConvAutoencoder()\n",
    "# # criterion = nn.MSELoss()\n",
    "# # optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n",
    "\n",
    "# train(model, test_loader_Cifar10bw, criterion, optimizer, num_epochs=num_epoch, online=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 6222,
     "status": "ok",
     "timestamp": 1693777695290,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "96LUcE1kXon3",
    "outputId": "6cc3e1d7-a466-4a5b-b7d2-518902b4e837"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Files already downloaded and verified\n",
      "(10000, 196)\n"
     ]
    }
   ],
   "source": [
    "transform = transforms.Compose(\n",
    "    [transforms.Resize((28, 28)),  # Resize images to match the size of MNIST\n",
    "     transforms.ToTensor()])\n",
    "\n",
    "test_dataset_Cifar10bw = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)\n",
    "\n",
    "# Creating Dataloaders from the\n",
    "# training and testing dataset\n",
    "test_loader_Cifar10bw = torch.utils.data.DataLoader(test_dataset_Cifar10bw, batch_size=10000, shuffle=True)\n",
    "\n",
    "for batch in test_loader_Cifar10bw:\n",
    "    img, _ = batch\n",
    "    img = torchvision.transforms.Grayscale(num_output_channels=1)(img)\n",
    "    #img = img.reshape(-1,1,28,28)\n",
    "\n",
    "    # Generating output\n",
    "    out = model.forward(img)\n",
    "    latent_test_out = model.forward_encoder(img)\n",
    "\n",
    "\n",
    "latent_test_out_Cifar10bw = latent_test_out.detach().numpy()\n",
    "latent_test_out_Cifar10bw = latent_test_out_Cifar10bw.reshape(-1,4*7*7)\n",
    "print(latent_test_out_Cifar10bw.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 4418,
     "status": "ok",
     "timestamp": 1693777699690,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "-9ekzdVoYJIH",
    "outputId": "fd4f617c-d58a-4cc2-f6a2-e179728decf9"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(10000, 500)\n",
      "[[16 10 13 ...  6  6  4]\n",
      " [13 10 16 ...  6 16  1]\n",
      " [16 10 10 ...  6  6  1]\n",
      " ...\n",
      " [19 10 13 ...  7  6  1]\n",
      " [19 11 16 ... 11 16  1]\n",
      " [16 11 10 ...  6 16  1]]\n",
      "0.5091977875751503\n",
      "0.004230844138739862\n"
     ]
    }
   ],
   "source": [
    "leaves_test_out_Cifar10bw = et.apply(latent_test_out_Cifar10bw)\n",
    "\n",
    "print(leaves_test_out_Cifar10bw.shape)\n",
    "print(leaves_test_out_Cifar10bw)\n",
    "\n",
    "distances_test_out_Cifar10bw = np.zeros((500,500))\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_test_out_Cifar10bw[i,j] = hamming(leaves_test_out_Cifar10bw[i,:], leaves_test_out_Cifar10bw[j,:])\n",
    "\n",
    "score_test_out_Cifar10bw = sum(distances_test_out_Cifar10bw)/499\n",
    "\n",
    "print(np.mean(score_test_out_Cifar10bw))\n",
    "print(np.cov(score_test_out_Cifar10bw))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "WDCNJ2thdelj"
   },
   "source": [
    "## NotMNIST"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "hhMEPlqad6cl"
   },
   "outputs": [],
   "source": [
    "# model_Conv_AE_OOD_MNIST_train_30 = 'classifier.pt'\n",
    "# path = \"/content/gdrive/My Drive/{model_Conv_AE_OOD_MNIST_train_30}\"\n",
    "# model.load_state_dict(torch.load(path))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 1464,
     "status": "ok",
     "timestamp": 1693777701132,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "7HUmXsizdh0T",
    "outputId": "c8aae812-9608-4839-db83-e33ceca05518"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(500, 1, 28, 28)\n"
     ]
    }
   ],
   "source": [
    "from pims import ImageSequence\n",
    "from PIL import Image\n",
    "\n",
    "images_A = ImageSequence('/content/gdrive/MyDrive/TOOD/datasets/NotMNIST_small/A/*.png')\n",
    "images_B = ImageSequence('/content/gdrive/MyDrive/TOOD/datasets/NotMNIST_small/B/*.png')\n",
    "images_C = ImageSequence('/content/gdrive/MyDrive/TOOD/datasets/NotMNIST_small/C/*.png')\n",
    "images_D = ImageSequence('/content/gdrive/MyDrive/TOOD/datasets/NotMNIST_small/D/*.png')\n",
    "images_E = ImageSequence('/content/gdrive/MyDrive/TOOD/datasets/NotMNIST_small/E/*.png')\n",
    "images_F = ImageSequence('/content/gdrive/MyDrive/TOOD/datasets/NotMNIST_small/F/*.png')\n",
    "images_G = ImageSequence('/content/gdrive/MyDrive/TOOD/datasets/NotMNIST_small/G/*.png')\n",
    "images_H = ImageSequence('/content/gdrive/MyDrive/TOOD/datasets/NotMNIST_small/H/*.png')\n",
    "images_I = ImageSequence('/content/gdrive/MyDrive/TOOD/datasets/NotMNIST_small/I/*.png')\n",
    "images_J = ImageSequence('/content/gdrive/MyDrive/TOOD/datasets/NotMNIST_small/J/*.png')\n",
    "\n",
    "X_data_A = np.zeros((50, 28**2))\n",
    "X_data_B = np.zeros((50, 28**2))\n",
    "X_data_C = np.zeros((50, 28**2))\n",
    "X_data_D = np.zeros((50, 28**2))\n",
    "X_data_E = np.zeros((50, 28**2))\n",
    "X_data_F = np.zeros((50, 28**2))\n",
    "X_data_G = np.zeros((50, 28**2))\n",
    "X_data_H = np.zeros((50, 28**2))\n",
    "X_data_I = np.zeros((50, 28**2))\n",
    "X_data_J = np.zeros((50, 28**2))\n",
    "\n",
    "for i in range(50):\n",
    "    X_data_A[i,:] = np.reshape(np.array(images_A[i]), (1,28**2))/255\n",
    "    X_data_B[i,:] = np.reshape(np.array(images_B[i]), (1,28**2))/255\n",
    "    X_data_C[i,:] = np.reshape(np.array(images_C[i]), (1,28**2))/255\n",
    "    X_data_D[i,:] = np.reshape(np.array(images_D[i]), (1,28**2))/255\n",
    "    X_data_E[i,:] = np.reshape(np.array(images_E[i]), (1,28**2))/255\n",
    "    X_data_F[i,:] = np.reshape(np.array(images_F[i]), (1,28**2))/255\n",
    "    X_data_G[i,:] = np.reshape(np.array(images_G[i]), (1,28**2))/255\n",
    "    X_data_H[i,:] = np.reshape(np.array(images_H[i]), (1,28**2))/255\n",
    "    X_data_I[i,:] = np.reshape(np.array(images_I[i]), (1,28**2))/255\n",
    "    X_data_J[i,:] = np.reshape(np.array(images_J[i]), (1,28**2))/255\n",
    "\n",
    "\n",
    "X_test_out = np.vstack([X_data_A, X_data_B, X_data_C, X_data_D, X_data_E, X_data_F, X_data_G, X_data_H, X_data_I, X_data_J])\n",
    "X_test_out = X_test_out.reshape((-1,1,28,28))\n",
    "print(X_test_out.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "QtoumAYBdh4P"
   },
   "outputs": [],
   "source": [
    "# X_test_out_tensor = torch.tensor(X_test_out, dtype=torch.float32)\n",
    "# test_loader_NotMNIST = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=64, shuffle=True)\n",
    "\n",
    "# # model = ConvAutoencoder()\n",
    "# # criterion = nn.MSELoss()\n",
    "# # optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n",
    "\n",
    "# train(model, test_loader_NotMNIST, criterion, optimizer, num_epochs=num_epoch)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 10698,
     "status": "ok",
     "timestamp": 1693777711828,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "vqZPvS-3dh7k",
    "outputId": "9a59126d-2a0b-43b3-c352-1c4bfd784243"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(7280, 196)\n"
     ]
    }
   ],
   "source": [
    "test_loader_NotMNIST = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=10000, shuffle=True)\n",
    "\n",
    "for batch in test_loader_NotMNIST:\n",
    "\n",
    "    img = batch\n",
    "    #img = img.reshape(-1,1,28,28)\n",
    "\n",
    "    # Generating output\n",
    "    out = model.forward(img)\n",
    "    latent_test_out = model.forward_encoder(img)\n",
    "\n",
    "latent_test_out_NotMNIST = latent_test_out.detach().numpy()\n",
    "latent_test_out_NotMNIST = latent_test_out_NotMNIST.reshape(-1,4*7*7)\n",
    "print(latent_test_out_NotMNIST.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 4120,
     "status": "ok",
     "timestamp": 1693777715921,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "34usMYYqdh8u",
    "outputId": "6232ea9d-5f1c-418f-ea36-b910090c0dd5"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(7280, 500)\n",
      "[[ 7  5 16 ... 14 13  5]\n",
      " [18 11 16 ... 11 16  1]\n",
      " [ 7 11 16 ... 14 16  5]\n",
      " ...\n",
      " [ 7 11 16 ... 13 16  1]\n",
      " [18 11 16 ... 14 16  5]\n",
      " [18  4 16 ... 11 16  5]]\n",
      "0.41262090581162325\n",
      "0.0036183933604043463\n"
     ]
    }
   ],
   "source": [
    "leaves_test_out_NotMNIST = et.apply(latent_test_out_NotMNIST)\n",
    "\n",
    "print(leaves_test_out_NotMNIST.shape)\n",
    "print(leaves_test_out_NotMNIST)\n",
    "\n",
    "distances_test_out_NotMNIST = np.zeros((500,500))\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_test_out_NotMNIST[i,j] = hamming(leaves_test_out_NotMNIST[i,:], leaves_test_out_NotMNIST[j,:])\n",
    "\n",
    "score_test_out_NotMNIST = sum(distances_test_out_NotMNIST)/499\n",
    "\n",
    "print(np.mean(score_test_out_NotMNIST))\n",
    "print(np.cov(score_test_out_NotMNIST))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "SniWCeDXdh99"
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "tamsyBx9xv2t"
   },
   "source": [
    "## Gaussian"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "efiYSL3HjJa5"
   },
   "outputs": [],
   "source": [
    "# model_Conv_AE_OOD_MNIST_train_30 = 'classifier.pt'\n",
    "# path = \"/content/gdrive/My Drive/{model_Conv_AE_OOD_MNIST_train_30}\"\n",
    "# model.load_state_dict(torch.load(path))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 887,
     "status": "ok",
     "timestamp": 1693777716787,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "qAcmKgItEge1",
    "outputId": "3772f036-33de-4223-a457-eb8561603f6e"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(10000, 784)\n",
      "(10000, 1, 28, 28)\n"
     ]
    }
   ],
   "source": [
    "# Gaussian random noise\n",
    "mean_test_out = np.random.uniform(low=0,high=1,size=784)\n",
    "cov_test_out = np.random.rand(784,784)\n",
    "cov_test_out = np.matmul(cov_test_out, cov_test_out.T)\n",
    "X_test_out = np.random.multivariate_normal(mean_test_out, cov_test_out, 10000)\n",
    "X_test_out = MinMaxScaler().fit_transform(X_test_out)\n",
    "print(X_test_out.shape)\n",
    "X_test_out = X_test_out.reshape((-1,1,28,28))\n",
    "print(X_test_out.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "i1d4sx8MitCF"
   },
   "outputs": [],
   "source": [
    "# X_test_out_tensor = torch.tensor(X_test_out, dtype=torch.float32)\n",
    "# train_loader_Gaussian = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=64, shuffle=True)\n",
    "\n",
    "# # model = ConvAutoencoder()\n",
    "# # criterion = nn.MSELoss()\n",
    "# # optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n",
    "\n",
    "# train(model, train_loader_Gaussian, criterion, optimizer, num_epochs=num_epoch)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 889,
     "status": "ok",
     "timestamp": 1693777717673,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "QxSu2wMBxyYr",
    "outputId": "d9a7483e-9fee-489d-ff99-03dfd36b3656"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(10000, 4, 7, 7)\n",
      "(10000, 196)\n"
     ]
    }
   ],
   "source": [
    "X_test_out_tensor = torch.tensor(X_test_out, dtype=torch.float32)\n",
    "test_loader = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=10000)\n",
    "\n",
    "for batch in test_loader:\n",
    "    img = batch\n",
    "    img = img.reshape(-1,1,28,28)\n",
    "    # Generating output\n",
    "    #out = model.forward(img)\n",
    "    latent_test_out = model.forward_encoder(img)\n",
    "\n",
    "\n",
    "latent_test_out_Gaussian = latent_test_out.detach().numpy()\n",
    "print(latent_test_out_Gaussian.shape)\n",
    "latent_test_out_Gaussian = latent_test_out_Gaussian.reshape(-1,4*7*7)\n",
    "print(latent_test_out_Gaussian.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 4258,
     "status": "ok",
     "timestamp": 1693777721928,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "l24-4OTyxyaj",
    "outputId": "58fffd4c-31b4-4ec1-aff5-52b66d4476c6"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(10000, 500)\n",
      "[[18 10 10 ...  7  4  1]\n",
      " [18 10 16 ...  7  4  1]\n",
      " [16 10 10 ...  6  4  1]\n",
      " ...\n",
      " [18 11 16 ... 11 16  1]\n",
      " [19 10 10 ...  6  4  1]\n",
      " [18 10 10 ...  7  4  1]]\n",
      "0.31610045691382765\n",
      "0.008086912157154813\n"
     ]
    }
   ],
   "source": [
    "leaves_test_out_Gaussian = et.apply(latent_test_out_Gaussian)\n",
    "\n",
    "print(leaves_test_out_Gaussian.shape)\n",
    "print(leaves_test_out_Gaussian)\n",
    "\n",
    "distances_test_out_Gaussian = np.zeros((500,500))\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_test_out_Gaussian[i,j] = hamming(leaves_test_out_Gaussian[i,:], leaves_test_out_Gaussian[j,:])\n",
    "\n",
    "score_test_out_Gaussian = sum(distances_test_out_Gaussian)/499\n",
    "\n",
    "print(np.mean(score_test_out_Gaussian))\n",
    "print(np.cov(score_test_out_Gaussian))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "4Zs0-6kFybMM"
   },
   "source": [
    "## Uniform"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "BHdD_LxEjNmI"
   },
   "outputs": [],
   "source": [
    "# model_Conv_AE_OOD_MNIST_train_30 = 'classifier.pt'\n",
    "# path = \"/content/gdrive/My Drive/{model_Conv_AE_OOD_MNIST_train_30}\"\n",
    "# model.load_state_dict(torch.load(path))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 316,
     "status": "ok",
     "timestamp": 1693777722223,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "6b5z_lW8xycj",
    "outputId": "9850e054-bb1c-4c52-b181-adf20b553a86"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(10000, 784)\n",
      "(10000, 1, 28, 28)\n"
     ]
    }
   ],
   "source": [
    "# Uniform random noise\n",
    "x_min = [0]*784\n",
    "x_max = [1]*784\n",
    "X_test_out = np.random.uniform(low=x_min, high=x_max, size=(10000,784))\n",
    "X_test_out = MinMaxScaler().fit_transform(X_test_out)\n",
    "print(X_test_out.shape)\n",
    "X_test_out = X_test_out.reshape((-1,1,28,28))\n",
    "print(X_test_out.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "X6MhxpC2jQU2"
   },
   "outputs": [],
   "source": [
    "# X_test_out_tensor = torch.tensor(X_test_out, dtype=torch.float32)\n",
    "# train_loader_Uniform = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=64, shuffle=True)\n",
    "\n",
    "# # model = ConvAutoencoder()\n",
    "# # criterion = nn.MSELoss()\n",
    "# # optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n",
    "\n",
    "# train(model, train_loader_Uniform, criterion, optimizer, num_epochs=num_epoch)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 863,
     "status": "ok",
     "timestamp": 1693777723081,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "BG0fLErIxyeZ",
    "outputId": "4d8b5ca1-2d0c-4612-9cf5-21967c7e4d0f"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(10000, 4, 7, 7)\n",
      "(10000, 196)\n"
     ]
    }
   ],
   "source": [
    "X_test_out_tensor = torch.tensor(X_test_out, dtype=torch.float32)\n",
    "test_loader = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=10000)\n",
    "\n",
    "for batch in test_loader:\n",
    "    img = batch\n",
    "    img = img.reshape(-1,1,28,28)\n",
    "    # Generating output\n",
    "    #out = model.forward(img)\n",
    "    latent_test_out = model.forward_encoder(img)\n",
    "\n",
    "\n",
    "latent_test_out_Uniform = latent_test_out.detach().numpy()\n",
    "print(latent_test_out_Uniform.shape)\n",
    "latent_test_out_Uniform = latent_test_out_Uniform.reshape(-1,4*7*7)\n",
    "print(latent_test_out_Uniform.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 4533,
     "status": "ok",
     "timestamp": 1693777727612,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "FvwHsDP5yif8",
    "outputId": "04fc38e4-3053-48d6-d6ba-ab5b9f7dcfd2"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(10000, 500)\n",
      "[[18 10 10 ...  6 16  1]\n",
      " [19 10 13 ...  7 16  1]\n",
      " [19 10 13 ...  6  6  1]\n",
      " ...\n",
      " [16 10 10 ... 11  4  1]\n",
      " [14 10 13 ...  7  4  1]\n",
      " [19 10 10 ...  7  4  1]]\n",
      "0.2750319839679362\n",
      "0.00022465967142336406\n"
     ]
    }
   ],
   "source": [
    "leaves_test_out_Uniform = et.apply(latent_test_out_Uniform)\n",
    "\n",
    "print(leaves_test_out_Uniform.shape)\n",
    "print(leaves_test_out_Uniform)\n",
    "\n",
    "distances_test_out_Uniform = np.zeros((500,500))\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_test_out_Uniform[i,j] = hamming(leaves_test_out_Uniform[i,:], leaves_test_out_Uniform[j,:])\n",
    "\n",
    "score_test_out_Uniform = sum(distances_test_out_Uniform)/499\n",
    "\n",
    "print(np.mean(score_test_out_Uniform))\n",
    "print(np.cov(score_test_out_Uniform))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "N_CKgEZ1yiju"
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "nxm_k1Noyil7"
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "FZGej4gIyhMD"
   },
   "source": [
    "# Results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 8,
     "status": "ok",
     "timestamp": 1693777728070,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "xMKKU1PywASB",
    "outputId": "b8689507-c09d-443c-dc92-e2f2e879e5ec"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.0\n",
      "1.0\n",
      "0.996\n",
      "0.9999999999999999\n",
      "0.999636\n",
      "0.989048\n",
      "0.996296\n",
      "1.0\n",
      "1.0\n",
      "0.9781839999999999\n",
      "1.0\n",
      "1.0\n",
      "1.0\n",
      "1.0\n",
      "1.0\n",
      "1.0\n"
     ]
    }
   ],
   "source": [
    "score_pred_Breast = np.concatenate([score_test_in, score_test_out_Breast])\n",
    "score_pred_Chest = np.concatenate([score_test_in, score_test_out_Chest])\n",
    "score_pred_Oct = np.concatenate([score_test_in, score_test_out_Oct])\n",
    "score_pred_Organa = np.concatenate([score_test_in, score_test_out_Organa])\n",
    "score_pred_Organc = np.concatenate([score_test_in, score_test_out_Organc])\n",
    "score_pred_Organs = np.concatenate([score_test_in, score_test_out_Organs])\n",
    "score_pred_Pneum = np.concatenate([score_test_in, score_test_out_Pneum])\n",
    "score_pred_Tissue = np.concatenate([score_test_in, score_test_out_Tissue])\n",
    "score_pred_KMNIST = np.concatenate([score_test_in, score_test_out_KMNIST])\n",
    "score_pred_QMNIST = np.concatenate([score_test_in, score_test_out_QMNIST])\n",
    "score_pred_Omniglot = np.concatenate([score_test_in, score_test_out_Omniglot])\n",
    "score_pred_Cifar10bw = np.concatenate([score_test_in, score_test_out_Cifar10bw])\n",
    "score_pred_NotMNIST = np.concatenate([score_test_in, score_test_out_NotMNIST])\n",
    "score_pred_Gaussian = np.concatenate([score_test_in, score_test_out_Gaussian])\n",
    "score_pred_Uniform = np.concatenate([score_test_in, score_test_out_Uniform])\n",
    "score_true = np.concatenate([np.ones(500), np.zeros(500)])\n",
    "\n",
    "print(roc_auc_score(score_true, score_pred_Breast))\n",
    "print(roc_auc_score(score_true, score_pred_Chest))\n",
    "print(roc_auc_score(score_true, score_pred_Oct))\n",
    "print(roc_auc_score(score_true, score_pred_Organa))\n",
    "print(roc_auc_score(score_true, score_pred_Organc))\n",
    "print(roc_auc_score(score_true, score_pred_Organs))\n",
    "print(roc_auc_score(score_true, score_pred_Pneum))\n",
    "print(roc_auc_score(score_true, score_pred_Tissue))\n",
    "print(roc_auc_score(score_true, score_pred_KMNIST))\n",
    "print(roc_auc_score(score_true, score_pred_QMNIST))\n",
    "print(roc_auc_score(score_true, score_pred_Omniglot))\n",
    "print(roc_auc_score(score_true, score_pred_Cifar10bw))\n",
    "print(roc_auc_score(score_true, score_pred_NotMNIST))\n",
    "print(roc_auc_score(score_true, score_pred_Gaussian))\n",
    "print(roc_auc_score(score_true, score_pred_Uniform))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 8,
     "status": "ok",
     "timestamp": 1693777728072,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "77iJeZtfwYyj",
    "outputId": "4f92e7c8-0c06-4418-df99-a5087a3d5d3f"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.0\n",
      "1.0\n",
      "0.9778167381843568\n",
      "1.0\n",
      "0.9996173484462874\n",
      "0.954527017759569\n",
      "0.9836590985682699\n",
      "1.0\n",
      "1.0\n",
      "0.9356535167182124\n",
      "1.0\n",
      "1.0\n",
      "1.0\n",
      "1.0\n",
      "1.0\n",
      "1.0\n"
     ]
    }
   ],
   "source": [
    "precision_Breast, recall_Breast, thresholds_Breast = precision_recall_curve(score_true, score_pred_Breast)\n",
    "precision_Chest, recall_Chest, thresholds_Chest = precision_recall_curve(score_true, score_pred_Chest)\n",
    "precision_Oct, recall_Oct, thresholds_Oct = precision_recall_curve(score_true, score_pred_Oct)\n",
    "precision_Organa, recall_Organa, thresholds_Organa = precision_recall_curve(score_true, score_pred_Organa)\n",
    "precision_Organc, recall_Organc, thresholds_Organc = precision_recall_curve(score_true, score_pred_Organc)\n",
    "precision_Organs, recall_Organs, thresholds_Organs = precision_recall_curve(score_true, score_pred_Organs)\n",
    "precision_Pneum, recall_Pneum, thresholds_Pneum = precision_recall_curve(score_true, score_pred_Pneum)\n",
    "precision_Tissue, recall_Tissue, thresholds_Tissue = precision_recall_curve(score_true, score_pred_Tissue)\n",
    "precision_KMNIST, recall_KMNIST, thresholds_KMNIST = precision_recall_curve(score_true, score_pred_KMNIST)\n",
    "precision_QMNIST, recall_QMNIST, thresholds_QMNIST = precision_recall_curve(score_true, score_pred_QMNIST)\n",
    "precision_Omniglot, recall_Omniglot, thresholds_Omniglot = precision_recall_curve(score_true, score_pred_Omniglot)\n",
    "precision_Cifar10bw, recall_Cifar10bw, threshold_Cifar10bw = precision_recall_curve(score_true, score_pred_Cifar10bw)\n",
    "precision_NotMNIST, recall_NotMNIST, thresholds_NotMNIST = precision_recall_curve(score_true, score_pred_NotMNIST)\n",
    "precision_Gaussian, recall_Gaussian, thresholds_Gaussian = precision_recall_curve(score_true, score_pred_Gaussian)\n",
    "precision_Uniform, recall_Uniform, thresholds_Uniform = precision_recall_curve(score_true, score_pred_Uniform)\n",
    "\n",
    "auc_precision_recall_Breast = auc(recall_Breast, precision_Breast)\n",
    "auc_precision_recall_Chest = auc(recall_Chest, precision_Chest)\n",
    "auc_precision_recall_Oct = auc(recall_Oct, precision_Oct)\n",
    "auc_precision_recall_Organa = auc(recall_Organa, precision_Organa)\n",
    "auc_precision_recall_Organc = auc(recall_Organc, precision_Organc)\n",
    "auc_precision_recall_Organs = auc(recall_Organs, precision_Organs)\n",
    "auc_precision_recall_Pneum = auc(recall_Pneum, precision_Pneum)\n",
    "auc_precision_recall_Tissue = auc(recall_Tissue, precision_Tissue)\n",
    "auc_precision_recall_KMNIST = auc(recall_KMNIST, precision_KMNIST)\n",
    "auc_precision_recall_QMNIST = auc(recall_QMNIST, precision_QMNIST)\n",
    "auc_precision_recall_Omniglot = auc(recall_Omniglot, precision_Omniglot)\n",
    "auc_precision_recall_Cifar10bw = auc(recall_Cifar10bw, precision_Cifar10bw)\n",
    "auc_precision_recall_NotMNIST = auc(recall_NotMNIST, precision_NotMNIST)\n",
    "auc_precision_recall_Gaussian = auc(recall_Gaussian, precision_Gaussian)\n",
    "auc_precision_recall_Uniform = auc(recall_Uniform, precision_Uniform)\n",
    "\n",
    "print(auc_precision_recall_Breast)\n",
    "print(auc_precision_recall_Chest)\n",
    "print(auc_precision_recall_Oct)\n",
    "print(auc_precision_recall_Organa)\n",
    "print(auc_precision_recall_Organc)\n",
    "print(auc_precision_recall_Organs)\n",
    "print(auc_precision_recall_Pneum)\n",
    "print(auc_precision_recall_Tissue)\n",
    "print(auc_precision_recall_KMNIST)\n",
    "print(auc_precision_recall_QMNIST)\n",
    "print(auc_precision_recall_Omniglot)\n",
    "print(auc_precision_recall_Cifar10bw)\n",
    "print(auc_precision_recall_NotMNIST)\n",
    "print(auc_precision_recall_Gaussian)\n",
    "print(auc_precision_recall_Uniform)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 6,
     "status": "ok",
     "timestamp": 1693777728072,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "IW5FtpbMWrGf",
    "outputId": "f0f24e5b-dd9f-41ca-d6fd-ff6d35700bd4"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.0\n",
      "0.0\n",
      "0.004\n",
      "0.0\n",
      "0.002\n",
      "0.012\n",
      "0.004\n",
      "0.0\n",
      "0.0\n",
      "0.042\n",
      "0.0\n",
      "0.0\n",
      "0.0\n",
      "0.0\n",
      "0.0\n",
      "0.0\n"
     ]
    }
   ],
   "source": [
    "def compute_fpr95(y_true, y_pred_probs):\n",
    "    fpr, tpr, thresholds = sklearn.metrics.roc_curve(y_true, y_pred_probs)\n",
    "    idx = np.abs(tpr - 0.95).argmin()\n",
    "    fpr95 = fpr[idx]\n",
    "    return fpr95\n",
    "\n",
    "# Example usage\n",
    "# Assuming you have y_true (true labels) and y_pred_probs (predicted probabilities)\n",
    "fpr95_score_Breast = compute_fpr95(score_true, score_pred_Breast)\n",
    "fpr95_score_Chest = compute_fpr95(score_true, score_pred_Chest)\n",
    "fpr95_score_Oct = compute_fpr95(score_true, score_pred_Oct)\n",
    "fpr95_score_Organa = compute_fpr95(score_true, score_pred_Organa)\n",
    "fpr95_score_Organc = compute_fpr95(score_true, score_pred_Organc)\n",
    "fpr95_score_Organs = compute_fpr95(score_true, score_pred_Organs)\n",
    "fpr95_score_Pneum = compute_fpr95(score_true, score_pred_Pneum)\n",
    "fpr95_score_Tissue = compute_fpr95(score_true, score_pred_Tissue)\n",
    "fpr95_score_KMNIST = compute_fpr95(score_true, score_pred_KMNIST)\n",
    "fpr95_score_QMNIST = compute_fpr95(score_true, score_pred_QMNIST)\n",
    "fpr95_score_Omniglot = compute_fpr95(score_true, score_pred_Omniglot)\n",
    "fpr95_score_NotMNIST = compute_fpr95(score_true, score_pred_NotMNIST)\n",
    "fpr95_score_Cifar10bw = compute_fpr95(score_true, score_pred_Cifar10bw)\n",
    "fpr95_score_Gaussian = compute_fpr95(score_true, score_pred_Gaussian)\n",
    "fpr95_score_Uniform = compute_fpr95(score_true, score_pred_Uniform)\n",
    "\n",
    "print(fpr95_score_Breast)\n",
    "print(fpr95_score_Chest)\n",
    "print(fpr95_score_Oct)\n",
    "print(fpr95_score_Organa)\n",
    "print(fpr95_score_Organc)\n",
    "print(fpr95_score_Organs)\n",
    "print(fpr95_score_Pneum)\n",
    "print(fpr95_score_Tissue)\n",
    "print(fpr95_score_KMNIST)\n",
    "print(fpr95_score_QMNIST)\n",
    "print(fpr95_score_Omniglot)\n",
    "print(fpr95_score_NotMNIST)\n",
    "print(fpr95_score_Cifar10bw)\n",
    "print(fpr95_score_Uniform)\n",
    "print(fpr95_score_Gaussian)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "gZKuFE86Ey0O"
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 232,
     "status": "ok",
     "timestamp": 1693777728299,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "9Lltqc9pEy3V",
    "outputId": "b52d80c6-6e3e-45ba-adb8-25e551f964e8"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.8931809699398802 3.362444956734138e-05\n",
      "0.6914437194388777 0.002269430331407682\n",
      "0.457059254509018 0.003879313033436223\n",
      "0.4444889298597194 0.004354740913215967\n",
      "0.4147494188376754 0.005415355122011038\n",
      "0.622519246492986 0.007519940942672991\n",
      "0.6102598957915831 0.007140277156151874\n",
      "0.6127244248496994 0.0073032816240303725\n",
      "0.3365375070140281 0.0037587742533124052\n",
      "0.4205789659318637 0.004957656652298349\n",
      "0.8213646012024048 0.0012461203126889625\n",
      "0.6896508376753506 0.002805395450891807\n",
      "0.20521898196392788 0.0007923738987085201\n",
      "0.5091977875751503 0.004230844138739862\n",
      "0.41262090581162325 0.0036183933604043463\n",
      "0.31610045691382765 0.008086912157154813\n",
      "0.2750319839679362 0.00022465967142336406\n"
     ]
    }
   ],
   "source": [
    "print(np.mean(score_test_in), np.cov(score_test_in))\n",
    "print(np.mean(score_test_out_Breast), np.cov(score_test_out_Breast))\n",
    "print(np.mean(score_test_out_Chest), np.cov(score_test_out_Chest))\n",
    "print(np.mean(score_test_out_Oct), np.cov(score_test_out_Oct))\n",
    "print(np.mean(score_test_out_Organa), np.cov(score_test_out_Organa))\n",
    "print(np.mean(score_test_out_Organc), np.cov(score_test_out_Organc))\n",
    "print(np.mean(score_test_out_Organs), np.cov(score_test_out_Organs))\n",
    "print(np.mean(score_test_out_Pneum), np.cov(score_test_out_Pneum))\n",
    "print(np.mean(score_test_out_Tissue), np.cov(score_test_out_Tissue))\n",
    "print(np.mean(score_test_out_KMNIST), np.cov(score_test_out_KMNIST))\n",
    "print(np.mean(score_test_out_QMNIST), np.cov(score_test_out_QMNIST))\n",
    "print(np.mean(score_test_out_Omniglot), np.cov(score_test_out_Omniglot))\n",
    "print(np.mean(score_test_out_Cifar10bw), np.cov(score_test_out_Cifar10bw))\n",
    "print(np.mean(score_test_out_NotMNIST), np.cov(score_test_out_NotMNIST))\n",
    "print(np.mean(score_test_out_Gaussian), np.cov(score_test_out_Gaussian))\n",
    "print(np.mean(score_test_out_Uniform), np.cov(score_test_out_Uniform))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "BQQOs20XAERs"
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "-WxPZ-_CcFst"
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "qyjuDTOXQ4J3"
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "colab": {
   "authorship_tag": "ABX9TyNNERBKHIMx9Bt1acZ/IXj7",
   "gpuType": "V100",
   "machine_shape": "hm",
   "provenance": [
    {
     "file_id": "1Fp5qny4ygd5s_dco0cnOGx4MohZmAEiT",
     "timestamp": 1690739976595
    }
   ]
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
