{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 23203,
     "status": "ok",
     "timestamp": 1694024170763,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "Au3Fcvf9ONNZ",
    "outputId": "a6211f93-4dd4-4bc4-8b2a-bb9c90578fcc"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Mounted at /content/gdrive\n"
     ]
    }
   ],
   "source": [
    "from google.colab import drive\n",
    "drive.mount('/content/gdrive')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 6105,
     "status": "ok",
     "timestamp": 1694024176865,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "cOzitmuHEa-L",
    "outputId": "bfaa2962-0cac-4956-bd50-54828bb0957e"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Collecting PIMS\n",
      "  Downloading PIMS-0.6.1.tar.gz (86 kB)\n",
      "\u001b[?25l     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/86.0 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r",
      "\u001b[2K     \u001b[91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[90m╺\u001b[0m\u001b[90m━\u001b[0m \u001b[32m81.9/86.0 kB\u001b[0m \u001b[31m2.4 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.0/86.0 kB\u001b[0m \u001b[31m2.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
      "Requirement already satisfied: imageio in /usr/local/lib/python3.10/dist-packages (from PIMS) (2.31.1)\n",
      "Requirement already satisfied: numpy>=1.19 in /usr/local/lib/python3.10/dist-packages (from PIMS) (1.23.5)\n",
      "Collecting slicerator>=0.9.8 (from PIMS)\n",
      "  Downloading slicerator-1.1.0-py3-none-any.whl (10 kB)\n",
      "Requirement already satisfied: pillow>=8.3.2 in /usr/local/lib/python3.10/dist-packages (from imageio->PIMS) (9.4.0)\n",
      "Building wheels for collected packages: PIMS\n",
      "  Building wheel for PIMS (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
      "  Created wheel for PIMS: filename=PIMS-0.6.1-py3-none-any.whl size=82615 sha256=96b20fd335e0b9f57ffd3ca9d1e703044b8dbcb10da9a176d0a29b3b6ddad3a0\n",
      "  Stored in directory: /root/.cache/pip/wheels/cc/bf/3e/bfa77232d942f8244145f9c713b6b38f6ef04b6fb5c021c114\n",
      "Successfully built PIMS\n",
      "Installing collected packages: slicerator, PIMS\n",
      "Successfully installed PIMS-0.6.1 slicerator-1.1.0\n"
     ]
    }
   ],
   "source": [
    "pip install PIMS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 6070,
     "status": "ok",
     "timestamp": 1694024190535,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "9WyZRdVSFsvW",
    "outputId": "ddc48267-ed55-4ec4-ee7e-d6c865c4da34"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Collecting fastcluster\n",
      "  Downloading fastcluster-1.2.6-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n",
      "\u001b[?25l     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/194.0 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r",
      "\u001b[2K     \u001b[91m━━━━━━━━━━\u001b[0m\u001b[91m╸\u001b[0m\u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m51.2/194.0 kB\u001b[0m \u001b[31m1.2 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.0/194.0 kB\u001b[0m \u001b[31m2.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: numpy>=1.9 in /usr/local/lib/python3.10/dist-packages (from fastcluster) (1.23.5)\n",
      "Installing collected packages: fastcluster\n",
      "Successfully installed fastcluster-1.2.6\n"
     ]
    }
   ],
   "source": [
    "pip install fastcluster"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "executionInfo": {
     "elapsed": 6273,
     "status": "ok",
     "timestamp": 1694024196804,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "UFN8FbHYOWnl"
   },
   "outputs": [],
   "source": [
    "import torch\n",
    "import numpy as np\n",
    "from torchvision import datasets\n",
    "import torchvision.transforms as transforms\n",
    "import torch.nn as nn\n",
    "import torch.nn.functional as F\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import torchvision\n",
    "import torch\n",
    "from torchvision import transforms\n",
    "\n",
    "plt.rcParams['axes.facecolor'] = 'white'\n",
    "plt.rcParams['figure.figsize'] = 9, 6"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "executionInfo": {
     "elapsed": 22290,
     "status": "ok",
     "timestamp": 1694024219092,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "CKgWca6rEXRj"
   },
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "\n",
    "# importing relevant libraries\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import scipy as sp\n",
    "import matplotlib.pyplot as plt\n",
    "import sklearn\n",
    "from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier\n",
    "from sklearn.preprocessing import OneHotEncoder, StandardScaler\n",
    "from sklearn.model_selection import cross_val_predict, StratifiedKFold\n",
    "from sklearn.metrics import roc_auc_score, average_precision_score, precision_recall_curve, auc#plot_precision_recall_curve\n",
    "from sklearn.datasets import make_classification\n",
    "from sklearn.preprocessing import MinMaxScaler\n",
    "from tqdm import tqdm\n",
    "from umap import UMAP\n",
    "from pynndescent import NNDescent\n",
    "from fastcluster import single\n",
    "from scipy.cluster.hierarchy import cut_tree, fcluster, dendrogram\n",
    "from scipy.spatial.distance import squareform\n",
    "from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier\n",
    "from pims import ImageSequence\n",
    "from PIL import Image\n",
    "from scipy.spatial.distance import hamming\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')\n",
    "\n",
    "# turning off automatic plot showing, and setting style\n",
    "plt.style.use('bmh')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 22,
     "status": "ok",
     "timestamp": 1693776873252,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "8iNq7_xZ6WJK",
    "outputId": "4fc7408d-6def-47d1-ebde-0b641e0d5362"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "cpu\n"
     ]
    }
   ],
   "source": [
    "if torch.cuda.is_available():\n",
    "    device = torch.device('cuda')\n",
    "else:\n",
    "    device = torch.device('cpu')\n",
    "\n",
    "print(device)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "HrjCzoe8OWqH"
   },
   "outputs": [],
   "source": [
    "# define the NN architecture\n",
    "class ConvAutoencoder(nn.Module):\n",
    "    def __init__(self):\n",
    "        super(ConvAutoencoder, self).__init__()\n",
    "        ## encoder layers ##\n",
    "        # conv layer (depth from 1 --> 16), 3x3 kernels\n",
    "        self.conv1 = nn.Conv2d(1, 16, 3, padding=1)\n",
    "        # conv layer (depth from 16 --> 4), 3x3 kernels\n",
    "        self.conv2 = nn.Conv2d(16, 4, 3, padding=1)\n",
    "        # pooling layer to reduce x-y dims by two; kernel and stride of 2\n",
    "        self.pool = nn.MaxPool2d(2, 2)\n",
    "\n",
    "        ## decoder layers ##\n",
    "        ## a kernel of 2 and a stride of 2 will increase the spatial dims by 2\n",
    "        self.t_conv1 = nn.ConvTranspose2d(4, 16, 2, stride=2)\n",
    "        self.t_conv2 = nn.ConvTranspose2d(16, 1, 2, stride=2)\n",
    "\n",
    "\n",
    "    def forward(self, x):\n",
    "        ## encode ##\n",
    "        # add hidden layers with relu activation function\n",
    "        # and maxpooling after\n",
    "        x = F.relu(self.conv1(x))\n",
    "        x = self.pool(x)\n",
    "        # add second hidden layer\n",
    "        x = F.relu(self.conv2(x))\n",
    "        x = self.pool(x)  # compressed representation\n",
    "\n",
    "        ## decode ##\n",
    "        # add transpose conv layers, with relu activation function\n",
    "        x = F.relu(self.t_conv1(x))\n",
    "        # output layer (with sigmoid for scaling from 0 to 1)\n",
    "        x = F.sigmoid(self.t_conv2(x))\n",
    "\n",
    "        return x\n",
    "\n",
    "    def forward_encoder(self, x):\n",
    "        x = F.relu(self.conv1(x))\n",
    "        x = self.pool(x)\n",
    "        # add second hidden layer\n",
    "        x = F.relu(self.conv2(x))\n",
    "        x = self.pool(x)  # compressed representation\n",
    "\n",
    "        return x\n",
    "\n",
    "\n",
    "# Training function\n",
    "def train(model, train_loader, criterion, optimizer, num_epochs=10, online = False):\n",
    "    model.train()\n",
    "    for epoch in range(num_epochs):\n",
    "        running_loss = 0.0\n",
    "        if not online:\n",
    "            for images in train_loader:\n",
    "                noisy_images = images #+ torch.randn(images.size()) * 0.1  # Adding Gaussian noise\n",
    "                noisy_images = torchvision.transforms.Grayscale(num_output_channels=1)(noisy_images)\n",
    "                optimizer.zero_grad()\n",
    "                outputs = model(noisy_images)\n",
    "                loss = criterion(outputs, images)\n",
    "                loss.backward()\n",
    "                optimizer.step()\n",
    "                running_loss += loss.item()\n",
    "        else:\n",
    "            for images, _ in train_loader:\n",
    "                noisy_images = images #+ torch.randn(images.size()) * 0.1  # Adding Gaussian noise\n",
    "                noisy_images = torchvision.transforms.Grayscale(num_output_channels=1)(noisy_images)\n",
    "                optimizer.zero_grad()\n",
    "                outputs = model(noisy_images)\n",
    "                loss = criterion(outputs, images)\n",
    "                loss.backward()\n",
    "                optimizer.step()\n",
    "                running_loss += loss.item()\n",
    "        print(f\"Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss / len(train_loader)}\")\n",
    "\n",
    "# Data preprocessing\n",
    "transform = transforms.Compose([\n",
    "    transforms.ToTensor(),\n",
    "])\n",
    "\n",
    "# Initialize the model, criterion, and optimizer\n",
    "model = ConvAutoencoder()\n",
    "criterion = nn.MSELoss()\n",
    "optimizer = torch.optim.Adam(model.parameters(), lr=0.001)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 526604,
     "status": "ok",
     "timestamp": 1693777399848,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "Om9z7Bi4O-sW",
    "outputId": "f97a0d0c-0489-44bc-ae77-04fd0a1a48d7"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch [1/30], Loss: 0.05354074912387997\n",
      "Epoch [2/30], Loss: 0.02353846527405703\n",
      "Epoch [3/30], Loss: 0.019709812828710975\n",
      "Epoch [4/30], Loss: 0.017938856980693873\n",
      "Epoch [5/30], Loss: 0.016658776267957903\n",
      "Epoch [6/30], Loss: 0.01576462466674827\n",
      "Epoch [7/30], Loss: 0.01515678797604274\n",
      "Epoch [8/30], Loss: 0.014196194339988392\n",
      "Epoch [9/30], Loss: 0.011834625582665459\n",
      "Epoch [10/30], Loss: 0.011265794512257774\n",
      "Epoch [11/30], Loss: 0.010941788693194959\n",
      "Epoch [12/30], Loss: 0.010702029917079376\n",
      "Epoch [13/30], Loss: 0.010497058968125248\n",
      "Epoch [14/30], Loss: 0.010335392355402586\n",
      "Epoch [15/30], Loss: 0.010199229802880714\n",
      "Epoch [16/30], Loss: 0.010086894666811804\n",
      "Epoch [17/30], Loss: 0.009999012351155218\n",
      "Epoch [18/30], Loss: 0.0099247941143636\n",
      "Epoch [19/30], Loss: 0.009862085450877514\n",
      "Epoch [20/30], Loss: 0.009808675355232282\n",
      "Epoch [21/30], Loss: 0.009761888505037088\n",
      "Epoch [22/30], Loss: 0.00971129397923218\n",
      "Epoch [23/30], Loss: 0.009668383121824086\n",
      "Epoch [24/30], Loss: 0.009628672316583044\n",
      "Epoch [25/30], Loss: 0.009597304516406393\n",
      "Epoch [26/30], Loss: 0.00956970690412006\n",
      "Epoch [27/30], Loss: 0.009538259906277283\n",
      "Epoch [28/30], Loss: 0.009512355380844491\n",
      "Epoch [29/30], Loss: 0.009488214658740075\n",
      "Epoch [30/30], Loss: 0.009467175576899415\n"
     ]
    }
   ],
   "source": [
    "# Load CIFAR-10 data\n",
    "train_dataset = torchvision.datasets.MNIST(root=\".MNIST/train\", train=True, transform=transform, download=True)\n",
    "train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)\n",
    "\n",
    "# Training the model\n",
    "train(model, train_loader, criterion, optimizer, num_epochs=30, online=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 24525,
     "status": "ok",
     "timestamp": 1693777424335,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "bh3Zu2QWO-uq",
    "outputId": "ce8b03cc-c4a7-4ab3-e5c6-cb53da4a7466"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'torchvision.datasets.mnist.MNIST'>\n",
      "[3 4 6 ... 9 6 8]\n",
      "(60000, 1, 28, 28)\n",
      "(60000, 4, 7, 7)\n",
      "(60000,)\n"
     ]
    }
   ],
   "source": [
    "train_dataset = torchvision.datasets.MNIST(root=\".MNIST/train\", train=True, transform=transform, download=True)\n",
    "train_loader = torch.utils.data.DataLoader(train_dataset, batch_size = 60000, shuffle=True)\n",
    "\n",
    "print(type(train_dataset))\n",
    "\n",
    "for batch in train_loader:\n",
    "    img, labels = batch\n",
    "    #img = img.reshape(-1, 28*28)\n",
    "\n",
    "    # Generating output\n",
    "    out = model.forward(img)\n",
    "    latent_train = model.forward_encoder(img)\n",
    "\n",
    "X_train = img.detach().numpy()\n",
    "latent_train = latent_train.detach().numpy()\n",
    "y = labels.detach().numpy()\n",
    "print(y)\n",
    "print(X_train.shape)\n",
    "print(latent_train.shape)\n",
    "print(y.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 1061,
     "status": "ok",
     "timestamp": 1693777425394,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "ssO_Le6NP6HZ",
    "outputId": "f45fea22-c251-4797-9207-b5d7be89eae2"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(60000, 784)\n",
      "(60000, 196)\n",
      "0.0018747908346718198\n",
      "0.010904943518778552\n"
     ]
    }
   ],
   "source": [
    "X_train = X_train.reshape(-1,28*28)\n",
    "latent_train = latent_train.reshape(-1,4*7*7)\n",
    "print(X_train.shape)\n",
    "print(latent_train.shape)\n",
    "print(np.mean(np.cov(X_train.T)))\n",
    "print(np.mean(np.cov(latent_train.T)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "fw90altp9HQr"
   },
   "outputs": [],
   "source": [
    "# y = y[:2400]\n",
    "# latent_train = latent_train[:2400,:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 18,
     "status": "ok",
     "timestamp": 1693777482529,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "W_x-D1GZD-XN",
    "outputId": "32f3f091-ae55-4295-e90e-1658a8628e45"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[7 9 0 ... 2 0 7]\n"
     ]
    }
   ],
   "source": [
    "# np.random.shuffle(y)\n",
    "# print(y)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "k3hp5R2cQCD9"
   },
   "source": [
    "# Tree Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 8190,
     "status": "ok",
     "timestamp": 1693777490706,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "R3tnvjyuQKpq",
    "outputId": "7a3f2a78-db3d-4f83-c504-7d240d4c77a6"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Area under the ROC Curve: 0.5109056745187117\n"
     ]
    }
   ],
   "source": [
    "et = ExtraTreesClassifier(n_estimators=500, min_samples_leaf=100,\n",
    "                          max_features='sqrt', bootstrap=True, class_weight='balanced', n_jobs=-1)\n",
    "\n",
    "# et = RandomForestClassifier(n_estimators=500, min_samples_leaf=100,\n",
    "#                           max_features='sqrt', bootstrap=True, class_weight='balanced', n_jobs=-1)\n",
    "\n",
    "# validation instance\n",
    "skf = StratifiedKFold(n_splits=5, shuffle=True)\n",
    "\n",
    "# getting the model validation predictions\n",
    "preds = cross_val_predict(et, latent_train, y, cv=skf, method='predict_proba')\n",
    "\n",
    "# evaluating the model\n",
    "print('Area under the ROC Curve:', roc_auc_score(y, preds, multi_class='ovo'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 92
    },
    "executionInfo": {
     "elapsed": 1474,
     "status": "ok",
     "timestamp": 1693777492156,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "T2zHZflUQlOr",
    "outputId": "ee93fb4a-22da-490c-a47a-53b0e73f8a99"
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>#sk-container-id-20 {color: black;background-color: white;}#sk-container-id-20 pre{padding: 0;}#sk-container-id-20 div.sk-toggleable {background-color: white;}#sk-container-id-20 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-20 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-20 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-20 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-20 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-20 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-20 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-20 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-20 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-20 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-20 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-20 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-20 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-20 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-20 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-20 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-20 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-20 div.sk-item {position: relative;z-index: 1;}#sk-container-id-20 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-20 div.sk-item::before, #sk-container-id-20 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-20 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-20 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-20 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-20 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-20 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-20 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-20 div.sk-label-container {text-align: center;}#sk-container-id-20 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-20 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-20\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>ExtraTreesClassifier(bootstrap=True, class_weight=&#x27;balanced&#x27;,\n",
       "                     min_samples_leaf=100, n_estimators=500, n_jobs=-1)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-20\" type=\"checkbox\" checked><label for=\"sk-estimator-id-20\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">ExtraTreesClassifier</label><div class=\"sk-toggleable__content\"><pre>ExtraTreesClassifier(bootstrap=True, class_weight=&#x27;balanced&#x27;,\n",
       "                     min_samples_leaf=100, n_estimators=500, n_jobs=-1)</pre></div></div></div></div></div>"
      ],
      "text/plain": [
       "ExtraTreesClassifier(bootstrap=True, class_weight='balanced',\n",
       "                     min_samples_leaf=100, n_estimators=500, n_jobs=-1)"
      ]
     },
     "execution_count": 1917,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "et.fit(latent_train,y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 3861,
     "status": "ok",
     "timestamp": 1693777496014,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "6tMnYzZ_VPZR",
    "outputId": "65b8addd-bccb-4342-cc50-f997fbb336a7"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(2400, 500)\n",
      "[[ 2 12 14 ... 16  1  9]\n",
      " [13  3 16 ... 15  8 13]\n",
      " [ 4  3 17 ... 12  3  9]\n",
      " ...\n",
      " [19  3 10 ... 13  3  4]\n",
      " [13 14  3 ... 16 12  4]\n",
      " [ 9 12  3 ... 15  1 13]]\n",
      "0.8722389579158316\n",
      "5.963223876437905e-05\n"
     ]
    }
   ],
   "source": [
    "leaves_train = et.apply(latent_train)\n",
    "print(leaves_train.shape)\n",
    "print(leaves_train)\n",
    "\n",
    "distances_train = np.zeros((500,500))\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_train[i,j] = hamming(leaves_train[i,:], leaves_train[j,:])\n",
    "\n",
    "score_train = sum(distances_train)/499\n",
    "\n",
    "print(np.mean(score_train))\n",
    "print(np.cov(score_train))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "Q55SNuDNhQQ1"
   },
   "outputs": [],
   "source": [
    "# model_Conv_AE_OOD_MNIST_train_30 = 'classifier.pt'\n",
    "# path = \"/content/gdrive/My Drive/{model_Conv_AE_OOD_MNIST_train_30}\"\n",
    "# torch.save(model.state_dict(), path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "0hpY3jsdhQUb"
   },
   "outputs": [],
   "source": [
    "# model_Conv_AE_OOD_MNIST_train_30 = 'classifier.pt'\n",
    "# path = \"/content/gdrive/My Drive/{model_Conv_AE_OOD_MNIST_train_30}\"\n",
    "# model.load_state_dict(torch.load(path))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "U6SnpaEVWOQf"
   },
   "source": [
    "# Testing on ID Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "R6olj5rRmTpo"
   },
   "outputs": [],
   "source": [
    "#num_epoch = 30"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "BLgLZ7STVPbU"
   },
   "outputs": [],
   "source": [
    "# test_dataset_MNIST = torchvision.datasets.MNIST(root=\".MNIST/test\", train=False, transform=transform, download=True)\n",
    "# test_loader_MNIST = torch.utils.data.DataLoader(test_dataset_MNIST, batch_size = 64, shuffle=True)\n",
    "\n",
    "# # model = ConvAutoencoder()\n",
    "# # criterion = nn.MSELoss()\n",
    "# # optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n",
    "\n",
    "# train(model, test_loader_MNIST, criterion, optimizer, num_epochs=num_epoch, online=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 3413,
     "status": "ok",
     "timestamp": 1693777499404,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "zM65LrAhVPda",
    "outputId": "71f8e2e9-767c-46d9-bc98-4902fb6b6b27"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(10000, 1, 28, 28)\n",
      "(10000, 4, 7, 7)\n",
      "(10000, 784)\n",
      "(10000, 196)\n"
     ]
    }
   ],
   "source": [
    "test_dataset_MNIST = torchvision.datasets.MNIST(root=\".MNIST/test\", train=False, transform=transform, download=True)\n",
    "test_loader_MNIST = torch.utils.data.DataLoader(test_dataset_MNIST, batch_size = 10000, shuffle=True)\n",
    "\n",
    "for batch in test_loader_MNIST:\n",
    "    img, _ = batch\n",
    "    #img = torchvision.transforms.Grayscale(num_output_channels=1)(img)\n",
    "    #img = img.reshape(-1, 28*28)\n",
    "\n",
    "    # Generating output\n",
    "    out = model.forward(img)\n",
    "    latent_test = model.forward_encoder(img)\n",
    "\n",
    "\n",
    "latent_test = latent_test.detach().numpy()\n",
    "X_test = img.detach().numpy()\n",
    "print(X_test.shape)\n",
    "print(latent_test.shape)\n",
    "X_test = X_test.reshape(-1,1*28*28)\n",
    "latent_test = latent_test.reshape(-1,4*7*7)\n",
    "print(X_test.shape)\n",
    "print(latent_test.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 3996,
     "status": "ok",
     "timestamp": 1693777503377,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "5l77BQBXVPfP",
    "outputId": "a74e0514-f3d8-461b-df51-cca166e983ff"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(10000, 500)\n",
      "[[13 13 14 ...  2 12 11]\n",
      " [16  4 16 ... 12  3 13]\n",
      " [16 12 10 ... 11  3  4]\n",
      " ...\n",
      " [20  8 17 ...  8 12  9]\n",
      " [19  9  9 ...  8  8  6]\n",
      " [ 2 12 11 ...  2  6 14]]\n",
      "0.871146869739479\n",
      "6.698297940476351e-05\n"
     ]
    }
   ],
   "source": [
    "latent_test_in = latent_test\n",
    "\n",
    "leaves_test_in = et.apply(latent_test_in)\n",
    "print(leaves_test_in.shape)\n",
    "print(leaves_test_in)\n",
    "\n",
    "distances_test_in = np.zeros((500,500))\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_test_in[i,j] = hamming(leaves_test_in[i,:], leaves_test_in[j,:])\n",
    "\n",
    "score_test_in = sum(distances_test_in)/499\n",
    "print(np.mean(score_test_in))\n",
    "print(np.cov(score_test_in))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "zKHeKNSXWZQr"
   },
   "source": [
    "# Testing on OOD Data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "DQgAh1gaXxP6"
   },
   "source": [
    "## FashionMNIST"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "DTO-dHR3hkcS"
   },
   "outputs": [],
   "source": [
    "# model_Conv_AE_OOD_MNIST_train_30 = 'classifier.pt'\n",
    "# path = \"/content/gdrive/My Drive/{model_Conv_AE_OOD_MNIST_train_30}\"\n",
    "# model.load_state_dict(torch.load(path))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 995,
     "status": "ok",
     "timestamp": 1693777504350,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "U6-VmvGdWLPA",
    "outputId": "af422d89-b799-4b88-9185-b317aa2fb39c"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(10000, 1, 28, 28)\n"
     ]
    }
   ],
   "source": [
    "df_test_out = pd.read_csv('/content/gdrive/MyDrive/TOOD/datasets/fashion-mnist_test.csv')\n",
    "#df_test_out = pd.read_csv('/content/gdrive/MyDrive/OODdata/mnist_test.csv')\n",
    "data_test_out = np.array(df_test_out)\n",
    "\n",
    "X_test_out = data_test_out[:, 1:785]/255\n",
    "X_test_out = X_test_out.reshape((-1,1,28,28))\n",
    "print(X_test_out.shape)\n",
    "\n",
    "# X_test_out_tensor = torch.tensor(X_test_out, dtype=torch.float32)\n",
    "# test_loader_Fashion = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=64, shuffle=True)\n",
    "\n",
    "# # model = ConvAutoencoder()\n",
    "# # criterion = nn.MSELoss()\n",
    "# # optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n",
    "\n",
    "# train(model, test_loader_Fashion, criterion, optimizer, num_epochs=num_epoch)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 1849,
     "status": "ok",
     "timestamp": 1693777506197,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "G-T51piGWLRU",
    "outputId": "e71862c0-f1c9-43ab-83ef-628e0154ff37"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(10000, 4, 7, 7)\n",
      "(10000, 196)\n"
     ]
    }
   ],
   "source": [
    "X_test_out_tensor = torch.tensor(X_test_out, dtype=torch.float32)\n",
    "test_loader_Fashion = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=10000, shuffle=True)\n",
    "\n",
    "for batch in test_loader_Fashion:\n",
    "\n",
    "    img = batch\n",
    "    #img = img.reshape(-1,1,28,28)\n",
    "\n",
    "    # Generating output\n",
    "    out = model.forward(img)\n",
    "    latent_test_out = model.forward_encoder(img)\n",
    "\n",
    "latent_test_out_Fashion = latent_test_out.detach().numpy()\n",
    "#latent_test_out = latent_test_out[0,:,:]\n",
    "print(latent_test_out_Fashion.shape)\n",
    "latent_test_out_Fashion = latent_test_out_Fashion.reshape(-1,4*7*7)\n",
    "print(latent_test_out_Fashion.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 4261,
     "status": "ok",
     "timestamp": 1693777510455,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "RMYCE2rXYeqT",
    "outputId": "9168e3d7-bdae-4314-e7f4-bb8c61d5e783"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(10000, 500)\n",
      "[[ 9 14  3 ... 15  1 14]\n",
      " [20  8 17 ... 15 12 13]\n",
      " [20 14  3 ... 16  6 14]\n",
      " ...\n",
      " [ 9 12  3 ... 15  8  4]\n",
      " [ 9  3 14 ... 13  6  9]\n",
      " [20 14  3 ... 15  8  9]]\n",
      "0.7336609699398798\n",
      "0.005447421200934667\n"
     ]
    }
   ],
   "source": [
    "leaves_test_out_Fashion = et.apply(latent_test_out_Fashion)\n",
    "\n",
    "print(leaves_test_out_Fashion.shape)\n",
    "print(leaves_test_out_Fashion)\n",
    "\n",
    "distances_test_out_Fashion = np.zeros((500,500))\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_test_out_Fashion[i,j] = hamming(leaves_test_out_Fashion[i,:], leaves_test_out_Fashion[j,:])\n",
    "\n",
    "score_test_out_Fashion = sum(distances_test_out_Fashion)/499\n",
    "\n",
    "print(np.mean(score_test_out_Fashion))\n",
    "print(np.cov(score_test_out_Fashion))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "3sbvCh5uLVF8"
   },
   "source": [
    "## BreastMNIST"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "xH7FHZucLXof"
   },
   "outputs": [],
   "source": [
    "# model_Conv_AE_OOD_MNIST_train_30 = 'classifier.pt'\n",
    "# path = \"/content/gdrive/My Drive/{model_Conv_AE_OOD_MNIST_train_30}\"\n",
    "# model.load_state_dict(torch.load(path))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "NPSj7LA5LcX1"
   },
   "outputs": [],
   "source": [
    "df = np.load('/content/gdrive/MyDrive/TOOD/datasets/breastmnist.npz')\n",
    "#X_test_out = df['test_images']/255\n",
    "[size, _, _] = df['train_images'].shape\n",
    "X_test_out = df['train_images'].reshape((size, 784))/255\n",
    "\n",
    "X_test_out = X_test_out.reshape((-1,1,28,28))\n",
    "\n",
    "# X_test_out_tensor = torch.tensor(X_test_out, dtype=torch.float32)\n",
    "# test_loader_ChestMNIST = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=64, shuffle=True)\n",
    "\n",
    "# # model = ConvAutoencoder()\n",
    "# # criterion = nn.MSELoss()\n",
    "# # optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n",
    "\n",
    "# train(model, test_loader_ChestMNIST, criterion, optimizer, num_epochs=num_epoch)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 207,
     "status": "ok",
     "timestamp": 1693777510653,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "TlTGVy9BLcZ4",
    "outputId": "089e408e-147b-4703-8b29-a06c65926fa8"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "546\n",
      "(546, 4, 7, 7)\n",
      "(546, 196)\n"
     ]
    }
   ],
   "source": [
    "[size, _, _] = df['train_images'].shape\n",
    "print(size)\n",
    "\n",
    "X_test_out_tensor = torch.tensor(X_test_out, dtype=torch.float32)\n",
    "test_loader_BreastMNIST = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=size, shuffle=True)\n",
    "\n",
    "for batch in test_loader_BreastMNIST:\n",
    "\n",
    "    img = batch\n",
    "    #img = img.reshape(-1,1,28,28)\n",
    "\n",
    "    # Generating output\n",
    "    out = model.forward(img)\n",
    "    latent_test_out = model.forward_encoder(img)\n",
    "\n",
    "latent_test_out_Breast = latent_test_out.detach().numpy()\n",
    "#latent_test_out = latent_test_out[0,:,:]\n",
    "print(latent_test_out_Breast.shape)\n",
    "latent_test_out_Breast = latent_test_out_Breast.reshape(-1,4*7*7)\n",
    "print(latent_test_out_Breast.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 3833,
     "status": "ok",
     "timestamp": 1693777514482,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "AwnfD_3CLcb7",
    "outputId": "5d6fe0ce-6d9a-4f50-c961-379b5a7d722c"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(546, 500)\n",
      "[[10 14  3 ... 15  6  4]\n",
      " [18  8 14 ... 15  8  6]\n",
      " [18  8 14 ... 15  6  9]\n",
      " ...\n",
      " [ 4  8 14 ... 11  8  6]\n",
      " [13 13 10 ... 15  6  4]\n",
      " [10  4 14 ... 11  6  9]]\n",
      "0.5002765050100201\n",
      "0.0042377139675370155\n"
     ]
    }
   ],
   "source": [
    "leaves_test_out_Breast = et.apply(latent_test_out_Breast)\n",
    "\n",
    "print(leaves_test_out_Breast.shape)\n",
    "print(leaves_test_out_Breast)\n",
    "\n",
    "distances_test_out_Breast = np.zeros((500,500))\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_test_out_Breast[i,j] = hamming(leaves_test_out_Breast[i,:], leaves_test_out_Breast[j,:])\n",
    "\n",
    "score_test_out_Breast = sum(distances_test_out_Breast)/499\n",
    "\n",
    "print(np.mean(score_test_out_Breast))\n",
    "print(np.cov(score_test_out_Breast))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "bsj9NUx3LXq0"
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "1vkCzIAMy5Yi"
   },
   "source": [
    "## ChestMNIST"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "-I7QZNVUhnpI"
   },
   "outputs": [],
   "source": [
    "# model_Conv_AE_OOD_MNIST_train_30 = 'classifier.pt'\n",
    "# path = \"/content/gdrive/My Drive/{model_Conv_AE_OOD_MNIST_train_30}\"\n",
    "# model.load_state_dict(torch.load(path))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "3Nh3znXBv1za"
   },
   "outputs": [],
   "source": [
    "df = np.load('/content/gdrive/MyDrive/TOOD/datasets/chestmnist.npz')\n",
    "#X_test_out = df['test_images']/255\n",
    "[size, _, _] = df['test_images'].shape\n",
    "X_test_out = df['test_images'].reshape((size, 784))/255\n",
    "\n",
    "X_test_out = X_test_out.reshape((-1,1,28,28))\n",
    "\n",
    "# X_test_out_tensor = torch.tensor(X_test_out, dtype=torch.float32)\n",
    "# test_loader_ChestMNIST = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=64, shuffle=True)\n",
    "\n",
    "# # model = ConvAutoencoder()\n",
    "# # criterion = nn.MSELoss()\n",
    "# # optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n",
    "\n",
    "# train(model, test_loader_ChestMNIST, criterion, optimizer, num_epochs=num_epoch)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 4430,
     "status": "ok",
     "timestamp": 1693777519301,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "FFfdi--zv12B",
    "outputId": "27ac748c-6c4a-4476-880b-76043c08aa0c"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "22433\n",
      "(22433, 4, 7, 7)\n",
      "(22433, 196)\n"
     ]
    }
   ],
   "source": [
    "[size, _, _] = df['test_images'].shape\n",
    "print(size)\n",
    "\n",
    "X_test_out_tensor = torch.tensor(X_test_out, dtype=torch.float32)\n",
    "test_loader_ChestMNIST = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=size, shuffle=True)\n",
    "\n",
    "for batch in test_loader_ChestMNIST:\n",
    "\n",
    "    img = batch\n",
    "    #img = img.reshape(-1,1,28,28)\n",
    "\n",
    "    # Generating output\n",
    "    out = model.forward(img)\n",
    "    latent_test_out = model.forward_encoder(img)\n",
    "\n",
    "latent_test_out_Chest = latent_test_out.detach().numpy()\n",
    "#latent_test_out = latent_test_out[0,:,:]\n",
    "print(latent_test_out_Chest.shape)\n",
    "latent_test_out_Chest = latent_test_out_Chest.reshape(-1,4*7*7)\n",
    "print(latent_test_out_Chest.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 4340,
     "status": "ok",
     "timestamp": 1693777523615,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "hLfKbVMzzxUi",
    "outputId": "19ca8a02-f0aa-4f45-9a35-e0fc2d0ed80f"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(22433, 500)\n",
      "[[ 9 14  3 ... 15  6 14]\n",
      " [10  3 11 ...  6  6  6]\n",
      " [ 9  4 11 ... 15  6  6]\n",
      " ...\n",
      " [ 9  3 11 ... 15  6  6]\n",
      " [ 9  4 11 ... 15  6  6]\n",
      " [10  4 11 ... 15  6  6]]\n",
      "0.35741585571142287\n",
      "0.0020381411440108894\n"
     ]
    }
   ],
   "source": [
    "leaves_test_out_Chest = et.apply(latent_test_out_Chest)\n",
    "\n",
    "print(leaves_test_out_Chest.shape)\n",
    "print(leaves_test_out_Chest)\n",
    "\n",
    "distances_test_out_Chest = np.zeros((500,500))\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_test_out_Chest[i,j] = hamming(leaves_test_out_Chest[i,:], leaves_test_out_Chest[j,:])\n",
    "\n",
    "score_test_out_Chest = sum(distances_test_out_Chest)/499\n",
    "\n",
    "print(np.mean(score_test_out_Chest))\n",
    "print(np.cov(score_test_out_Chest))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "Z7xvIZvWMO-t"
   },
   "source": [
    "## OctMNIST"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "ZXOIXhMFMUEO"
   },
   "outputs": [],
   "source": [
    "# model_Conv_AE_OOD_MNIST_train_30 = 'classifier.pt'\n",
    "# path = \"/content/gdrive/My Drive/{model_Conv_AE_OOD_MNIST_train_30}\"\n",
    "# model.load_state_dict(torch.load(path))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "9WyQFN4SMUGT"
   },
   "outputs": [],
   "source": [
    "df = np.load('/content/gdrive/MyDrive/TOOD/datasets/octmnist.npz')\n",
    "#X_test_out = df['test_images']/255\n",
    "[size, _, _] = df['test_images'].shape\n",
    "X_test_out = df['test_images'].reshape((size, 784))/255\n",
    "\n",
    "X_test_out = X_test_out.reshape((-1,1,28,28))\n",
    "\n",
    "# X_test_out_tensor = torch.tensor(X_test_out, dtype=torch.float32)\n",
    "# test_loader_ChestMNIST = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=64, shuffle=True)\n",
    "\n",
    "# # model = ConvAutoencoder()\n",
    "# # criterion = nn.MSELoss()\n",
    "# # optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n",
    "\n",
    "# train(model, test_loader_ChestMNIST, criterion, optimizer, num_epochs=num_epoch)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 161,
     "status": "ok",
     "timestamp": 1693777523755,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "twgmrsCUMUIH",
    "outputId": "e010ecb8-39b7-47cf-b1ca-a0354445c461"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1000\n",
      "(1000, 4, 7, 7)\n",
      "(1000, 196)\n"
     ]
    }
   ],
   "source": [
    "[size, _, _] = df['test_images'].shape\n",
    "print(size)\n",
    "\n",
    "X_test_out_tensor = torch.tensor(X_test_out, dtype=torch.float32)\n",
    "test_loader_OctMNIST = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=size, shuffle=True)\n",
    "\n",
    "for batch in test_loader_OctMNIST:\n",
    "\n",
    "    img = batch\n",
    "    #img = img.reshape(-1,1,28,28)\n",
    "\n",
    "    # Generating output\n",
    "    out = model.forward(img)\n",
    "    latent_test_out = model.forward_encoder(img)\n",
    "\n",
    "latent_test_out_Oct = latent_test_out.detach().numpy()\n",
    "#latent_test_out = latent_test_out[0,:,:]\n",
    "print(latent_test_out_Oct.shape)\n",
    "latent_test_out_Oct = latent_test_out_Oct.reshape(-1,4*7*7)\n",
    "print(latent_test_out_Oct.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 4150,
     "status": "ok",
     "timestamp": 1693777527903,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "JNTwND7cMUKG",
    "outputId": "31591b0f-ffbc-420a-ecb7-e31de370ee67"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(1000, 500)\n",
      "[[ 4  8 16 ... 11  8  9]\n",
      " [ 5  8 14 ... 11  8  9]\n",
      " [18  8 16 ...  9  8  9]\n",
      " ...\n",
      " [ 4  8 11 ... 11  8  6]\n",
      " [ 4  8  9 ... 11  8  6]\n",
      " [ 4  6 16 ...  9  8  9]]\n",
      "0.3729050581162325\n",
      "0.0042630307764521345\n"
     ]
    }
   ],
   "source": [
    "leaves_test_out_Oct = et.apply(latent_test_out_Oct)\n",
    "\n",
    "print(leaves_test_out_Oct.shape)\n",
    "print(leaves_test_out_Oct)\n",
    "\n",
    "distances_test_out_Oct = np.zeros((500,500))\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_test_out_Oct[i,j] = hamming(leaves_test_out_Oct[i,:], leaves_test_out_Oct[j,:])\n",
    "\n",
    "score_test_out_Oct = sum(distances_test_out_Oct)/499\n",
    "\n",
    "print(np.mean(score_test_out_Oct))\n",
    "print(np.cov(score_test_out_Oct))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "_Tguoeb3Nosv"
   },
   "source": [
    "## PneumMNIST"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "yhageUDChpGt"
   },
   "outputs": [],
   "source": [
    "# model_Conv_AE_OOD_MNIST_train_30 = 'classifier.pt'\n",
    "# path = \"/content/gdrive/My Drive/{model_Conv_AE_OOD_MNIST_train_30}\"\n",
    "# model.load_state_dict(torch.load(path))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "SJN2w-zVzxWm"
   },
   "outputs": [],
   "source": [
    "df = np.load('/content/gdrive/MyDrive/TOOD/datasets/pneumoniamnist.npz')\n",
    "#X_test_out = df['train_images']/255\n",
    "[size, _, _] = df['train_images'].shape\n",
    "X_test_out = df['train_images'].reshape((size, 784))/255\n",
    "\n",
    "X_test_out = X_test_out.reshape((-1,1,28,28))\n",
    "\n",
    "# X_test_out_tensor = torch.tensor(X_test_out, dtype=torch.float32)\n",
    "# test_loader_Pneum = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=64, shuffle=True)\n",
    "\n",
    "# # model = ConvAutoencoder()\n",
    "# # criterion = nn.MSELoss()\n",
    "# # optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n",
    "\n",
    "# train(model, test_loader_Pneum, criterion, optimizer, num_epochs=num_epoch)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 1046,
     "status": "ok",
     "timestamp": 1693777528925,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "zmGFQYZvNtiL",
    "outputId": "46fa0288-b577-40e4-ccd5-acac74e48575"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "4708\n",
      "(4708, 4, 7, 7)\n",
      "(4708, 196)\n"
     ]
    }
   ],
   "source": [
    "[size, _, _] = df['train_images'].shape\n",
    "print(size)\n",
    "\n",
    "X_test_out_tensor = torch.tensor(X_test_out, dtype=torch.float32)\n",
    "test_loader_Pneum = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=size, shuffle=True)\n",
    "\n",
    "for batch in test_loader_Pneum:\n",
    "\n",
    "    img = batch\n",
    "    #img = img.reshape(-1,1,28,28)\n",
    "\n",
    "    # Generating output\n",
    "    out = model.forward(img)\n",
    "    latent_test_out = model.forward_encoder(img)\n",
    "\n",
    "latent_test_out_Pneum = latent_test_out.detach().numpy()\n",
    "#latent_test_out = latent_test_out[0,:,:]\n",
    "print(latent_test_out_Pneum.shape)\n",
    "latent_test_out_Pneum = latent_test_out_Pneum.reshape(-1,4*7*7)\n",
    "print(latent_test_out_Pneum.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 3986,
     "status": "ok",
     "timestamp": 1693777532908,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "9SuNE4aSNt7b",
    "outputId": "7d70453a-17a1-4fd7-97af-913e6eed9cee"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(4708, 500)\n",
      "[[ 9 14 11 ... 15  1 14]\n",
      " [ 9  4 10 ... 15  6  6]\n",
      " [ 9  4 11 ...  6  6  6]\n",
      " ...\n",
      " [ 9 14 11 ... 15  6  6]\n",
      " [ 9 14  6 ... 15  6  6]\n",
      " [ 9 14  3 ... 15  6  4]]\n",
      "0.25875434068136277\n",
      "0.0038242125133755665\n"
     ]
    }
   ],
   "source": [
    "leaves_test_out_Pneum = et.apply(latent_test_out_Pneum)\n",
    "\n",
    "print(leaves_test_out_Pneum.shape)\n",
    "print(leaves_test_out_Pneum)\n",
    "\n",
    "distances_test_out_Pneum = np.zeros((500,500))\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_test_out_Pneum[i,j] = hamming(leaves_test_out_Pneum[i,:], leaves_test_out_Pneum[j,:])\n",
    "\n",
    "score_test_out_Pneum = sum(distances_test_out_Pneum)/499\n",
    "\n",
    "print(np.mean(score_test_out_Pneum))\n",
    "print(np.cov(score_test_out_Pneum))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "C7Lo5fFxNkeC"
   },
   "source": [
    "## OrganaMNIST"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "y1JVWYobNnXs"
   },
   "outputs": [],
   "source": [
    "# model_Conv_AE_OOD_MNIST_train_30 = 'classifier.pt'\n",
    "# path = \"/content/gdrive/My Drive/{model_Conv_AE_OOD_MNIST_train_30}\"\n",
    "# model.load_state_dict(torch.load(path))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "cHlm9xYvNnbS"
   },
   "outputs": [],
   "source": [
    "df = np.load('/content/gdrive/MyDrive/TOOD/datasets/organamnist.npz')\n",
    "#X_test_out = df['train_images']/255\n",
    "[size, _, _] = df['test_images'].shape\n",
    "X_test_out = df['test_images'].reshape((size, 784))/255\n",
    "\n",
    "X_test_out = X_test_out.reshape((-1,1,28,28))\n",
    "\n",
    "# X_test_out_tensor = torch.tensor(X_test_out, dtype=torch.float32)\n",
    "# test_loader_Organc = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=64, shuffle=True)\n",
    "\n",
    "# # model = ConvAutoencoder()\n",
    "# # criterion = nn.MSELoss()\n",
    "# # optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n",
    "\n",
    "# train(model, test_loader_Organc, criterion, optimizer, num_epochs=num_epoch)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 3872,
     "status": "ok",
     "timestamp": 1693777537064,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "-0ODe_SyNneE",
    "outputId": "f5353d26-a267-42f6-d3a1-6473eff56c9a"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "17778\n",
      "(17778, 4, 7, 7)\n",
      "(17778, 196)\n"
     ]
    }
   ],
   "source": [
    "[size, _, _] = df['test_images'].shape\n",
    "print(size)\n",
    "\n",
    "X_test_out_tensor = torch.tensor(X_test_out, dtype=torch.float32)\n",
    "test_loader_Organa = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=size, shuffle=True)\n",
    "\n",
    "for batch in test_loader_Organa:\n",
    "\n",
    "    img = batch\n",
    "    #img = img.reshape(-1,1,28,28)\n",
    "\n",
    "    # Generating output\n",
    "    out = model.forward(img)\n",
    "    latent_test_out = model.forward_encoder(img)\n",
    "\n",
    "latent_test_out_Organa = latent_test_out.detach().numpy()\n",
    "#latent_test_out = latent_test_out[0,:,:]\n",
    "print(latent_test_out_Organa.shape)\n",
    "latent_test_out_Organa = latent_test_out_Organa.reshape(-1,4*7*7)\n",
    "print(latent_test_out_Organa.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 4195,
     "status": "ok",
     "timestamp": 1693777541256,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "MOFGPtBYNngB",
    "outputId": "e5a61809-1a76-4f5e-f592-f27fc4e83d27"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(17778, 500)\n",
      "[[10  4  9 ...  6  6  6]\n",
      " [18 13  3 ... 15  6 13]\n",
      " [13 14 10 ...  6  8  6]\n",
      " ...\n",
      " [ 9 14  3 ... 15  6  4]\n",
      " [10 12 10 ...  6  6  6]\n",
      " [ 5  4 14 ... 11  6 13]]\n",
      "0.5954216432865733\n",
      "0.011598263928196146\n"
     ]
    }
   ],
   "source": [
    "leaves_test_out_Organa = et.apply(latent_test_out_Organa)\n",
    "\n",
    "print(leaves_test_out_Organa.shape)\n",
    "print(leaves_test_out_Organa)\n",
    "\n",
    "distances_test_out_Organa = np.zeros((500,500))\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_test_out_Organa[i,j] = hamming(leaves_test_out_Organa[i,:], leaves_test_out_Organa[j,:])\n",
    "\n",
    "score_test_out_Organa = sum(distances_test_out_Organa)/499\n",
    "\n",
    "print(np.mean(score_test_out_Organa))\n",
    "print(np.cov(score_test_out_Organa))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "UEKwndGfCmv6"
   },
   "source": [
    "## OrgancMNIST"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "J1GLB1jDh3uE"
   },
   "outputs": [],
   "source": [
    "# model_Conv_AE_OOD_MNIST_train_30 = 'classifier.pt'\n",
    "# path = \"/content/gdrive/My Drive/{model_Conv_AE_OOD_MNIST_train_30}\"\n",
    "# model.load_state_dict(torch.load(path))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "6OlXP64YNuYF"
   },
   "outputs": [],
   "source": [
    "df = np.load('/content/gdrive/MyDrive/TOOD/datasets/organcmnist.npz')\n",
    "#X_test_out = df['train_images']/255\n",
    "[size, _, _] = df['test_images'].shape\n",
    "X_test_out = df['test_images'].reshape((size, 784))/255\n",
    "\n",
    "X_test_out = X_test_out.reshape((-1,1,28,28))\n",
    "\n",
    "# X_test_out_tensor = torch.tensor(X_test_out, dtype=torch.float32)\n",
    "# test_loader_Organc = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=64, shuffle=True)\n",
    "\n",
    "# # model = ConvAutoencoder()\n",
    "# # criterion = nn.MSELoss()\n",
    "# # optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n",
    "\n",
    "# train(model, test_loader_Organc, criterion, optimizer, num_epochs=num_epoch)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 1539,
     "status": "ok",
     "timestamp": 1693777542997,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "GneANgXlNuoo",
    "outputId": "81692d67-a96c-4d57-acb8-562a06f6ea03"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "8268\n",
      "(8268, 4, 7, 7)\n",
      "(8268, 196)\n"
     ]
    }
   ],
   "source": [
    "[size, _, _] = df['test_images'].shape\n",
    "print(size)\n",
    "\n",
    "X_test_out_tensor = torch.tensor(X_test_out, dtype=torch.float32)\n",
    "test_loader_Organc = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=size, shuffle=True)\n",
    "\n",
    "for batch in test_loader_Organc:\n",
    "\n",
    "    img = batch\n",
    "    #img = img.reshape(-1,1,28,28)\n",
    "\n",
    "    # Generating output\n",
    "    out = model.forward(img)\n",
    "    latent_test_out = model.forward_encoder(img)\n",
    "\n",
    "latent_test_out_Organc = latent_test_out.detach().numpy()\n",
    "#latent_test_out = latent_test_out[0,:,:]\n",
    "print(latent_test_out_Organc.shape)\n",
    "latent_test_out_Organc = latent_test_out_Organc.reshape(-1,4*7*7)\n",
    "print(latent_test_out_Organc.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 4372,
     "status": "ok",
     "timestamp": 1693777547366,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "poQYWGAOC5gL",
    "outputId": "8763264d-9a65-4851-f78e-289d8c810d27"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(8268, 500)\n",
      "[[ 9 14  3 ... 15  6  4]\n",
      " [20  8 10 ... 11  8 13]\n",
      " [13 14  3 ... 15  8  4]\n",
      " ...\n",
      " [10 14 10 ... 15  6  4]\n",
      " [ 9 14  3 ... 15  6  4]\n",
      " [13  4 10 ...  6  6 13]]\n",
      "0.5696672384769539\n",
      "0.012447356490973595\n"
     ]
    }
   ],
   "source": [
    "leaves_test_out_Organc = et.apply(latent_test_out_Organc)\n",
    "\n",
    "print(leaves_test_out_Organc.shape)\n",
    "print(leaves_test_out_Organc)\n",
    "\n",
    "distances_test_out_Organc = np.zeros((500,500))\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_test_out_Organc[i,j] = hamming(leaves_test_out_Organc[i,:], leaves_test_out_Organc[j,:])\n",
    "\n",
    "score_test_out_Organc = sum(distances_test_out_Organc)/499\n",
    "\n",
    "print(np.mean(score_test_out_Organc))\n",
    "print(np.cov(score_test_out_Organc))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "I4J8ADHIOI1l"
   },
   "source": [
    "## OrgansMNIST"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "Z9oI4sJpOL2y"
   },
   "outputs": [],
   "source": [
    "# model_Conv_AE_OOD_MNIST_train_30 = 'classifier.pt'\n",
    "# path = \"/content/gdrive/My Drive/{model_Conv_AE_OOD_MNIST_train_30}\"\n",
    "# model.load_state_dict(torch.load(path))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "Y9Wd57elOMA6"
   },
   "outputs": [],
   "source": [
    "df = np.load('/content/gdrive/MyDrive/TOOD/datasets/organsmnist.npz')\n",
    "#X_test_out = df['train_images']/255\n",
    "[size, _, _] = df['test_images'].shape\n",
    "X_test_out = df['test_images'].reshape((size, 784))/255\n",
    "\n",
    "X_test_out = X_test_out.reshape((-1,1,28,28))\n",
    "\n",
    "# X_test_out_tensor = torch.tensor(X_test_out, dtype=torch.float32)\n",
    "# test_loader_Organc = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=64, shuffle=True)\n",
    "\n",
    "# # model = ConvAutoencoder()\n",
    "# # criterion = nn.MSELoss()\n",
    "# # optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n",
    "\n",
    "# train(model, test_loader_Organc, criterion, optimizer, num_epochs=num_epoch)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 1958,
     "status": "ok",
     "timestamp": 1693777549511,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "r2zIds3qOMDY",
    "outputId": "9c87b764-ab28-4858-bdcb-2cd131674f36"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "8829\n",
      "(8829, 4, 7, 7)\n",
      "(8829, 196)\n"
     ]
    }
   ],
   "source": [
    "[size, _, _] = df['test_images'].shape\n",
    "print(size)\n",
    "\n",
    "X_test_out_tensor = torch.tensor(X_test_out, dtype=torch.float32)\n",
    "test_loader_Organs = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=size, shuffle=True)\n",
    "\n",
    "for batch in test_loader_Organs:\n",
    "\n",
    "    img = batch\n",
    "    #img = img.reshape(-1,1,28,28)\n",
    "\n",
    "    # Generating output\n",
    "    out = model.forward(img)\n",
    "    latent_test_out = model.forward_encoder(img)\n",
    "\n",
    "latent_test_out_Organs = latent_test_out.detach().numpy()\n",
    "#latent_test_out = latent_test_out[0,:,:]\n",
    "print(latent_test_out_Organs.shape)\n",
    "latent_test_out_Organs = latent_test_out_Organs.reshape(-1,4*7*7)\n",
    "print(latent_test_out_Organs.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 4109,
     "status": "ok",
     "timestamp": 1693777553617,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "S--_kE0yOMOV",
    "outputId": "a89dca80-b85f-4fe8-baff-6d2085539eb2"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(8829, 500)\n",
      "[[18  4 10 ...  9  8  6]\n",
      " [10  4  6 ...  6  6  6]\n",
      " [10 14 11 ... 15  6  6]\n",
      " ...\n",
      " [10 14  3 ... 15  6 14]\n",
      " [ 9 14  3 ... 15  6  4]\n",
      " [13 14  3 ... 15  6  4]]\n",
      "0.5727371703406814\n",
      "0.011573973577545712\n"
     ]
    }
   ],
   "source": [
    "leaves_test_out_Organs = et.apply(latent_test_out_Organs)\n",
    "\n",
    "print(leaves_test_out_Organs.shape)\n",
    "print(leaves_test_out_Organs)\n",
    "\n",
    "distances_test_out_Organs = np.zeros((500,500))\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_test_out_Organs[i,j] = hamming(leaves_test_out_Organs[i,:], leaves_test_out_Organs[j,:])\n",
    "\n",
    "score_test_out_Organs = sum(distances_test_out_Organs)/499\n",
    "\n",
    "print(np.mean(score_test_out_Organs))\n",
    "print(np.cov(score_test_out_Organs))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "PK_cY3XVDxlQ"
   },
   "source": [
    "## TissueMNIST"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "rXhtk9Mah8mR"
   },
   "outputs": [],
   "source": [
    "# model_Conv_AE_OOD_MNIST_train_30 = 'classifier.pt'\n",
    "# path = \"/content/gdrive/My Drive/{model_Conv_AE_OOD_MNIST_train_30}\"\n",
    "# model.load_state_dict(torch.load(path))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "ViCtSF5iC5ie"
   },
   "outputs": [],
   "source": [
    "df = np.load('/content/gdrive/MyDrive/TOOD/datasets/tissuemnist.npz')\n",
    "#X_test_out = df['test_images']/255\n",
    "[size, _, _] = df['test_images'].shape\n",
    "X_test_out = df['test_images'].reshape((size, 784))/255\n",
    "\n",
    "X_test_out = X_test_out.reshape((-1,1,28,28))\n",
    "\n",
    "# X_test_out_tensor = torch.tensor(X_test_out, dtype=torch.float32)\n",
    "# test_loader_Tissue = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=64, shuffle=True)\n",
    "\n",
    "# # model = ConvAutoencoder()\n",
    "# # criterion = nn.MSELoss()\n",
    "# # optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n",
    "\n",
    "# train(model, test_loader_Tissue, criterion, optimizer, num_epochs=num_epoch)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 9414,
     "status": "ok",
     "timestamp": 1693777563956,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "iSIHWqqOC5kT",
    "outputId": "8446b967-efa4-47cd-d96c-b7a2ffa2867e"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "47280\n",
      "(47280, 4, 7, 7)\n",
      "(47280, 196)\n"
     ]
    }
   ],
   "source": [
    "[size, _, _] = df['test_images'].shape\n",
    "print(size)\n",
    "\n",
    "X_test_out_tensor = torch.tensor(X_test_out, dtype=torch.float32)\n",
    "test_loader_Tissue = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=size, shuffle=True)\n",
    "\n",
    "for batch in test_loader_Tissue:\n",
    "\n",
    "    img = batch\n",
    "    #img = img.reshape(-1,1,28,28)\n",
    "\n",
    "    # Generating output\n",
    "    out = model.forward(img)\n",
    "    latent_test_out = model.forward_encoder(img)\n",
    "\n",
    "latent_test_out_Tissue = latent_test_out.detach().numpy()\n",
    "#latent_test_out = latent_test_out[0,:,:]\n",
    "print(latent_test_out_Tissue.shape)\n",
    "latent_test_out_Tissue = latent_test_out_Tissue.reshape(-1,4*7*7)\n",
    "print(latent_test_out_Tissue.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 8882,
     "status": "ok",
     "timestamp": 1693777572834,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "IUQ3nT57Egcv",
    "outputId": "71783170-3f50-442b-c923-7d8a387c1343"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(47280, 500)\n",
      "[[16  8 16 ... 11  8  9]\n",
      " [13  3 16 ...  9  6  9]\n",
      " [16  8 16 ... 11  8  9]\n",
      " ...\n",
      " [13  4 14 ...  9  8  9]\n",
      " [18  8 14 ...  9  8  9]\n",
      " [16  8 16 ...  9  8  9]]\n",
      "0.3627229018036072\n",
      "0.006573450811076395\n"
     ]
    }
   ],
   "source": [
    "leaves_test_out_Tissue = et.apply(latent_test_out_Tissue)\n",
    "\n",
    "print(leaves_test_out_Tissue.shape)\n",
    "print(leaves_test_out_Tissue)\n",
    "\n",
    "distances_test_out_Tissue = np.zeros((500,500))\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_test_out_Tissue[i,j] = hamming(leaves_test_out_Tissue[i,:], leaves_test_out_Tissue[j,:])\n",
    "\n",
    "score_test_out_Tissue = sum(distances_test_out_Tissue)/499\n",
    "\n",
    "print(np.mean(score_test_out_Tissue))\n",
    "print(np.cov(score_test_out_Tissue))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "V9pDbbT2Q9xv"
   },
   "source": [
    "## KMNIST"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "e7-UM77Fh-w-"
   },
   "outputs": [],
   "source": [
    "# model_Conv_AE_OOD_MNIST_train_30 = 'classifier.pt'\n",
    "# path = \"/content/gdrive/My Drive/{model_Conv_AE_OOD_MNIST_train_30}\"\n",
    "# model.load_state_dict(torch.load(path))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "4BiRKanXxvFv"
   },
   "outputs": [],
   "source": [
    "# test_dataset_KMNIST = torchvision.datasets.KMNIST(root=\"./data\", train=False, transform=transform, download=True)\n",
    "# test_loader_KMNIST = torch.utils.data.DataLoader(test_dataset_KMNIST, batch_size = 64, shuffle=True)\n",
    "\n",
    "# # model = ConvAutoencoder()\n",
    "# # criterion = nn.MSELoss()\n",
    "# # optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n",
    "\n",
    "# train(model, test_loader_KMNIST, criterion, optimizer, num_epochs=num_epoch, online=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 8896,
     "status": "ok",
     "timestamp": 1693777581723,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "IrDPmFlTxvH4",
    "outputId": "0d44a4bb-c2bf-4e44-e918-2a63b8fa628c"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(10000, 196)\n"
     ]
    }
   ],
   "source": [
    "transform = transforms.Compose(\n",
    "    [transforms.Resize((28, 28)),  # Resize images to match the size of MNIST\n",
    "     transforms.ToTensor()])\n",
    "\n",
    "test_dataset_KMNIST = torchvision.datasets.KMNIST(root='./data', train=False, download=True, transform=transform)\n",
    "test_loader_KMNIST = torch.utils.data.DataLoader(test_dataset_KMNIST, batch_size=10000, shuffle=True)\n",
    "\n",
    "for batch in test_loader_KMNIST:\n",
    "    img, _ = batch\n",
    "    #img = torchvision.transforms.Grayscale(num_output_channels=1)(img)\n",
    "\n",
    "    # Generating output\n",
    "    out = model.forward(img)\n",
    "    latent_test_out = model.forward_encoder(img)\n",
    "\n",
    "\n",
    "latent_test_out_KMNIST = latent_test_out.detach().numpy()\n",
    "latent_test_out_KMNIST = latent_test_out_KMNIST.reshape(-1,4*7*7)\n",
    "print(latent_test_out_KMNIST.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 4647,
     "status": "ok",
     "timestamp": 1693777586347,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "WqFrdnepRkI7",
    "outputId": "7671d884-2747-442e-cecd-ec62e8073d72"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(10000, 500)\n",
      "[[19 12 14 ...  2  8  9]\n",
      " [11  3 16 ... 12  6 11]\n",
      " [ 9  9  6 ... 12 11 14]\n",
      " ...\n",
      " [11 13 14 ...  6 10  9]\n",
      " [ 5  3 10 ...  9 11  4]\n",
      " [18 12 14 ... 16 12  9]]\n",
      "0.8379014028056112\n",
      "0.0003529398326534493\n"
     ]
    }
   ],
   "source": [
    "leaves_test_out_KMNIST = et.apply(latent_test_out_KMNIST)\n",
    "\n",
    "print(leaves_test_out_KMNIST.shape)\n",
    "print(leaves_test_out_KMNIST)\n",
    "\n",
    "distances_test_out_KMNIST = np.zeros((500,500))\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_test_out_KMNIST[i,j] = hamming(leaves_test_out_KMNIST[i,:], leaves_test_out_KMNIST[j,:])\n",
    "\n",
    "score_test_out_KMNIST = sum(distances_test_out_KMNIST)/499\n",
    "\n",
    "print(np.mean(score_test_out_KMNIST))\n",
    "print(np.cov(score_test_out_KMNIST))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "brj6eAE_SJn2"
   },
   "source": [
    "## QMNIST"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "dIaTRR3kif-b"
   },
   "outputs": [],
   "source": [
    "# model_Conv_AE_OOD_MNIST_train_30 = 'classifier.pt'\n",
    "# path = \"/content/gdrive/My Drive/{model_Conv_AE_OOD_MNIST_train_30}\"\n",
    "# model.load_state_dict(torch.load(path))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "1iG0aHi7xvJH"
   },
   "outputs": [],
   "source": [
    "# test_dataset_QMNIST = torchvision.datasets.QMNIST(root=\"./data\", train=False, transform=transform, download=True)\n",
    "# test_loader_QMNIST = torch.utils.data.DataLoader(test_dataset_QMNIST, batch_size = 64, shuffle=True)\n",
    "\n",
    "# # model = ConvAutoencoder()\n",
    "# # criterion = nn.MSELoss()\n",
    "# # optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n",
    "\n",
    "# train(model, test_loader_QMNIST, criterion, optimizer, num_epochs=num_epoch, online=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 21454,
     "status": "ok",
     "timestamp": 1693777607799,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "gXmIqYEnSMZs",
    "outputId": "4358ae64-ee38-4ba5-a7ec-fb5fcd60a250"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(10000, 196)\n"
     ]
    }
   ],
   "source": [
    "transform = transforms.Compose(\n",
    "    [transforms.Resize((28, 28)),  # Resize images to match the size of MNIST\n",
    "     transforms.ToTensor()])\n",
    "\n",
    "test_dataset_QMNIST = torchvision.datasets.QMNIST(root='./data', train=False, download=True, transform=transform)\n",
    "test_loader_QMNIST = torch.utils.data.DataLoader(test_dataset_QMNIST, batch_size=10000, shuffle=True)\n",
    "\n",
    "for batch in test_loader_QMNIST:\n",
    "    img, _ = batch\n",
    "    #img = torchvision.transforms.Grayscale(num_output_channels=1)(img)\n",
    "\n",
    "    # Generating output\n",
    "    out = model.forward(img)\n",
    "    latent_test_out = model.forward_encoder(img)\n",
    "\n",
    "\n",
    "latent_test_out_QMNIST = latent_test_out.detach().numpy()\n",
    "latent_test_out_QMNIST = latent_test_out_QMNIST.reshape(-1,4*7*7)\n",
    "print(latent_test_out_QMNIST.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 4381,
     "status": "ok",
     "timestamp": 1693777612168,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "OW51VvQQSMbw",
    "outputId": "e3095173-557f-4524-d699-ba242ded9084"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(10000, 500)\n",
      "[[ 9 14  3 ... 16  6 13]\n",
      " [ 9  4  3 ... 16 11  4]\n",
      " [20 13 14 ...  2  6 11]\n",
      " ...\n",
      " [16 12 16 ... 11  1  9]\n",
      " [ 4  8 19 ...  8 10 11]\n",
      " [18 13  1 ...  9 12 11]]\n",
      "0.8716412024048095\n",
      "5.010651252746746e-05\n"
     ]
    }
   ],
   "source": [
    "leaves_test_out_QMNIST = et.apply(latent_test_out_QMNIST)\n",
    "\n",
    "print(leaves_test_out_QMNIST.shape)\n",
    "print(leaves_test_out_QMNIST)\n",
    "\n",
    "distances_test_out_QMNIST = np.zeros((500,500))\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_test_out_QMNIST[i,j] = hamming(leaves_test_out_QMNIST[i,:], leaves_test_out_QMNIST[j,:])\n",
    "\n",
    "score_test_out_QMNIST = sum(distances_test_out_QMNIST)/499\n",
    "\n",
    "print(np.mean(score_test_out_QMNIST))\n",
    "print(np.cov(score_test_out_QMNIST))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "iKdP-64sRyQ8"
   },
   "source": [
    "## Omniglot"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "GNAQF4F4R9JQ"
   },
   "outputs": [],
   "source": [
    "# model_Conv_AE_OOD_MNIST_train_30 = 'classifier.pt'\n",
    "# path = \"/content/gdrive/My Drive/{model_Conv_AE_OOD_MNIST_train_30}\"\n",
    "# model.load_state_dict(torch.load(path))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "bQdeByYMSFYz"
   },
   "outputs": [],
   "source": [
    "# test_dataset_Omniglot = torchvision.datasets.Omniglot(root='./data', download=True, transform=transform)\n",
    "# test_loader_Omniglot = torch.utils.data.DataLoader(test_dataset_QMNIST, batch_size = 64, shuffle=True)\n",
    "\n",
    "# # model = ConvAutoencoder()\n",
    "# # criterion = nn.MSELoss()\n",
    "# # optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n",
    "\n",
    "# train(model, test_loader_Omniglot, criterion, optimizer, num_epochs=num_epoch, online=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 11635,
     "status": "ok",
     "timestamp": 1693777623775,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "rOznxhcjSMd0",
    "outputId": "ffc1b0a4-1270-4cfa-87dc-e4bea845dfcd"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Files already downloaded and verified\n",
      "(19280, 196)\n"
     ]
    }
   ],
   "source": [
    "transform = transforms.Compose(\n",
    "    [transforms.Resize((28, 28)),  # Resize images to match the size of MNIST\n",
    "     transforms.ToTensor()])\n",
    "\n",
    "test_dataset_Omniglot = torchvision.datasets.Omniglot(root='./data', download=True, transform=transform)\n",
    "\n",
    "# Creating Dataloaders from the\n",
    "# training and testing dataset\n",
    "test_loader_Omniglot = torch.utils.data.DataLoader(test_dataset_Omniglot, batch_size=32460, shuffle=True)\n",
    "\n",
    "for batch in test_loader_Omniglot:\n",
    "    img, _ = batch\n",
    "    #img = img.reshape(-1,1,28,28)\n",
    "\n",
    "    # Generating output\n",
    "    out = model.forward(img)\n",
    "    latent_test_out = model.forward_encoder(img)\n",
    "\n",
    "\n",
    "latent_test_out_Omniglot = latent_test_out.detach().numpy()\n",
    "latent_test_out_Omniglot = latent_test_out_Omniglot.reshape(-1,4*7*7)\n",
    "print(latent_test_out_Omniglot.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 4303,
     "status": "ok",
     "timestamp": 1693777628075,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "2l0MVvcaSMfp",
    "outputId": "5ddaadbd-ad61-4138-9567-e8a3beb635d3"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(19280, 500)\n",
      "[[ 9 14  6 ... 15  1  4]\n",
      " [ 9 14  3 ... 15  6  9]\n",
      " [ 9 14  3 ... 15  6  4]\n",
      " ...\n",
      " [10 14  3 ... 15  6 14]\n",
      " [ 9 14  3 ... 15  6 14]\n",
      " [10 14  3 ... 15  1 14]]\n",
      "0.22584562725450905\n",
      "0.0013018556112509212\n"
     ]
    }
   ],
   "source": [
    "leaves_test_out_Omniglot = et.apply(latent_test_out_Omniglot)\n",
    "\n",
    "print(leaves_test_out_Omniglot.shape)\n",
    "print(leaves_test_out_Omniglot)\n",
    "\n",
    "distances_test_out_Omniglot = np.zeros((500,500))\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_test_out_Omniglot[i,j] = hamming(leaves_test_out_Omniglot[i,:], leaves_test_out_Omniglot[j,:])\n",
    "\n",
    "score_test_out_Omniglot = sum(distances_test_out_Omniglot)/499\n",
    "\n",
    "print(np.mean(score_test_out_Omniglot))\n",
    "print(np.cov(score_test_out_Omniglot))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "u9OZcvTSXkPh"
   },
   "source": [
    "## Cifar10bw"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "QfMu7Z3kXol1"
   },
   "outputs": [],
   "source": [
    "# model_Conv_AE_OOD_MNIST_train_30 = 'classifier.pt'\n",
    "# path = \"/content/gdrive/My Drive/{model_Conv_AE_OOD_MNIST_train_30}\"\n",
    "# model.load_state_dict(torch.load(path))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "Ej9JmeDhX0NG"
   },
   "outputs": [],
   "source": [
    "# test_dataset_Cifar10bw = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)\n",
    "# test_loader_Cifar10bw = torch.utils.data.DataLoader(test_dataset_Cifar10bw, batch_size = 64, shuffle=True)\n",
    "\n",
    "# # model = ConvAutoencoder()\n",
    "# # criterion = nn.MSELoss()\n",
    "# # optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n",
    "\n",
    "# train(model, test_loader_Cifar10bw, criterion, optimizer, num_epochs=num_epoch, online=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 5012,
     "status": "ok",
     "timestamp": 1693777633066,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "96LUcE1kXon3",
    "outputId": "a033f04d-00c1-47e4-a4a2-3c49d4931794"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Files already downloaded and verified\n",
      "(10000, 196)\n"
     ]
    }
   ],
   "source": [
    "transform = transforms.Compose(\n",
    "    [transforms.Resize((28, 28)),  # Resize images to match the size of MNIST\n",
    "     transforms.ToTensor()])\n",
    "\n",
    "test_dataset_Cifar10bw = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)\n",
    "\n",
    "# Creating Dataloaders from the\n",
    "# training and testing dataset\n",
    "test_loader_Cifar10bw = torch.utils.data.DataLoader(test_dataset_Cifar10bw, batch_size=10000, shuffle=True)\n",
    "\n",
    "for batch in test_loader_Cifar10bw:\n",
    "    img, _ = batch\n",
    "    img = torchvision.transforms.Grayscale(num_output_channels=1)(img)\n",
    "    #img = img.reshape(-1,1,28,28)\n",
    "\n",
    "    # Generating output\n",
    "    out = model.forward(img)\n",
    "    latent_test_out = model.forward_encoder(img)\n",
    "\n",
    "\n",
    "latent_test_out_Cifar10bw = latent_test_out.detach().numpy()\n",
    "latent_test_out_Cifar10bw = latent_test_out_Cifar10bw.reshape(-1,4*7*7)\n",
    "print(latent_test_out_Cifar10bw.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 4494,
     "status": "ok",
     "timestamp": 1693777637556,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "-9ekzdVoYJIH",
    "outputId": "9254529d-34ce-49f0-82e0-195bf9f519d1"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(10000, 500)\n",
      "[[13 14  3 ... 15  6  4]\n",
      " [13  4 14 ... 15  6  6]\n",
      " [10 14  3 ... 15  6  4]\n",
      " ...\n",
      " [10 14 10 ... 15  6  4]\n",
      " [10 13 11 ...  9  6  6]\n",
      " [18 14  3 ... 15  6  4]]\n",
      "0.5294821803607215\n",
      "0.0037189329746369383\n"
     ]
    }
   ],
   "source": [
    "leaves_test_out_Cifar10bw = et.apply(latent_test_out_Cifar10bw)\n",
    "\n",
    "print(leaves_test_out_Cifar10bw.shape)\n",
    "print(leaves_test_out_Cifar10bw)\n",
    "\n",
    "distances_test_out_Cifar10bw = np.zeros((500,500))\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_test_out_Cifar10bw[i,j] = hamming(leaves_test_out_Cifar10bw[i,:], leaves_test_out_Cifar10bw[j,:])\n",
    "\n",
    "score_test_out_Cifar10bw = sum(distances_test_out_Cifar10bw)/499\n",
    "\n",
    "print(np.mean(score_test_out_Cifar10bw))\n",
    "print(np.cov(score_test_out_Cifar10bw))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "WDCNJ2thdelj"
   },
   "source": [
    "## NotMNIST"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "hhMEPlqad6cl"
   },
   "outputs": [],
   "source": [
    "# model_Conv_AE_OOD_MNIST_train_30 = 'classifier.pt'\n",
    "# path = \"/content/gdrive/My Drive/{model_Conv_AE_OOD_MNIST_train_30}\"\n",
    "# model.load_state_dict(torch.load(path))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 1308,
     "status": "ok",
     "timestamp": 1693777638861,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "7HUmXsizdh0T",
    "outputId": "061917df-3b85-4af3-aee9-875bcc912ba8"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(500, 1, 28, 28)\n"
     ]
    }
   ],
   "source": [
    "from pims import ImageSequence\n",
    "from PIL import Image\n",
    "\n",
    "images_A = ImageSequence('/content/gdrive/MyDrive/TOOD/datasets/NotMNIST_small/A/*.png')\n",
    "images_B = ImageSequence('/content/gdrive/MyDrive/TOOD/datasets/NotMNIST_small/B/*.png')\n",
    "images_C = ImageSequence('/content/gdrive/MyDrive/TOOD/datasets/NotMNIST_small/C/*.png')\n",
    "images_D = ImageSequence('/content/gdrive/MyDrive/TOOD/datasets/NotMNIST_small/D/*.png')\n",
    "images_E = ImageSequence('/content/gdrive/MyDrive/TOOD/datasets/NotMNIST_small/E/*.png')\n",
    "images_F = ImageSequence('/content/gdrive/MyDrive/TOOD/datasets/NotMNIST_small/F/*.png')\n",
    "images_G = ImageSequence('/content/gdrive/MyDrive/TOOD/datasets/NotMNIST_small/G/*.png')\n",
    "images_H = ImageSequence('/content/gdrive/MyDrive/TOOD/datasets/NotMNIST_small/H/*.png')\n",
    "images_I = ImageSequence('/content/gdrive/MyDrive/TOOD/datasets/NotMNIST_small/I/*.png')\n",
    "images_J = ImageSequence('/content/gdrive/MyDrive/TOOD/datasets/NotMNIST_small/J/*.png')\n",
    "\n",
    "X_data_A = np.zeros((50, 28**2))\n",
    "X_data_B = np.zeros((50, 28**2))\n",
    "X_data_C = np.zeros((50, 28**2))\n",
    "X_data_D = np.zeros((50, 28**2))\n",
    "X_data_E = np.zeros((50, 28**2))\n",
    "X_data_F = np.zeros((50, 28**2))\n",
    "X_data_G = np.zeros((50, 28**2))\n",
    "X_data_H = np.zeros((50, 28**2))\n",
    "X_data_I = np.zeros((50, 28**2))\n",
    "X_data_J = np.zeros((50, 28**2))\n",
    "\n",
    "for i in range(50):\n",
    "    X_data_A[i,:] = np.reshape(np.array(images_A[i]), (1,28**2))/255\n",
    "    X_data_B[i,:] = np.reshape(np.array(images_B[i]), (1,28**2))/255\n",
    "    X_data_C[i,:] = np.reshape(np.array(images_C[i]), (1,28**2))/255\n",
    "    X_data_D[i,:] = np.reshape(np.array(images_D[i]), (1,28**2))/255\n",
    "    X_data_E[i,:] = np.reshape(np.array(images_E[i]), (1,28**2))/255\n",
    "    X_data_F[i,:] = np.reshape(np.array(images_F[i]), (1,28**2))/255\n",
    "    X_data_G[i,:] = np.reshape(np.array(images_G[i]), (1,28**2))/255\n",
    "    X_data_H[i,:] = np.reshape(np.array(images_H[i]), (1,28**2))/255\n",
    "    X_data_I[i,:] = np.reshape(np.array(images_I[i]), (1,28**2))/255\n",
    "    X_data_J[i,:] = np.reshape(np.array(images_J[i]), (1,28**2))/255\n",
    "\n",
    "\n",
    "X_test_out = np.vstack([X_data_A, X_data_B, X_data_C, X_data_D, X_data_E, X_data_F, X_data_G, X_data_H, X_data_I, X_data_J])\n",
    "X_test_out = X_test_out.reshape((-1,1,28,28))\n",
    "print(X_test_out.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "QtoumAYBdh4P"
   },
   "outputs": [],
   "source": [
    "# X_test_out_tensor = torch.tensor(X_test_out, dtype=torch.float32)\n",
    "# test_loader_NotMNIST = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=64, shuffle=True)\n",
    "\n",
    "# # model = ConvAutoencoder()\n",
    "# # criterion = nn.MSELoss()\n",
    "# # optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n",
    "\n",
    "# train(model, test_loader_NotMNIST, criterion, optimizer, num_epochs=num_epoch)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 9802,
     "status": "ok",
     "timestamp": 1693777648661,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "vqZPvS-3dh7k",
    "outputId": "12762721-9794-4a58-fb06-96e68593428b"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(7280, 196)\n"
     ]
    }
   ],
   "source": [
    "test_loader_NotMNIST = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=10000, shuffle=True)\n",
    "\n",
    "for batch in test_loader_NotMNIST:\n",
    "\n",
    "    img = batch\n",
    "    #img = img.reshape(-1,1,28,28)\n",
    "\n",
    "    # Generating output\n",
    "    out = model.forward(img)\n",
    "    latent_test_out = model.forward_encoder(img)\n",
    "\n",
    "latent_test_out_NotMNIST = latent_test_out.detach().numpy()\n",
    "latent_test_out_NotMNIST = latent_test_out_NotMNIST.reshape(-1,4*7*7)\n",
    "print(latent_test_out_NotMNIST.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 4114,
     "status": "ok",
     "timestamp": 1693777652750,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "34usMYYqdh8u",
    "outputId": "482a6246-a6f5-4a60-aba7-a0d8a5d5d31a"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(7280, 500)\n",
      "[[ 5  8 16 ... 11  8  9]\n",
      " [ 5  8 14 ...  9  8  9]\n",
      " [16  8 16 ... 11  8  9]\n",
      " ...\n",
      " [16  6 16 ...  9  6  9]\n",
      " [16  6 16 ... 11  8  9]\n",
      " [16  8 16 ... 11  8  9]]\n",
      "0.35013604809619236\n",
      "0.006879250267215913\n"
     ]
    }
   ],
   "source": [
    "leaves_test_out_NotMNIST = et.apply(latent_test_out_NotMNIST)\n",
    "\n",
    "print(leaves_test_out_NotMNIST.shape)\n",
    "print(leaves_test_out_NotMNIST)\n",
    "\n",
    "distances_test_out_NotMNIST = np.zeros((500,500))\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_test_out_NotMNIST[i,j] = hamming(leaves_test_out_NotMNIST[i,:], leaves_test_out_NotMNIST[j,:])\n",
    "\n",
    "score_test_out_NotMNIST = sum(distances_test_out_NotMNIST)/499\n",
    "\n",
    "print(np.mean(score_test_out_NotMNIST))\n",
    "print(np.cov(score_test_out_NotMNIST))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "SniWCeDXdh99"
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "tamsyBx9xv2t"
   },
   "source": [
    "## Gaussian"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "efiYSL3HjJa5"
   },
   "outputs": [],
   "source": [
    "# model_Conv_AE_OOD_MNIST_train_30 = 'classifier.pt'\n",
    "# path = \"/content/gdrive/My Drive/{model_Conv_AE_OOD_MNIST_train_30}\"\n",
    "# model.load_state_dict(torch.load(path))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 923,
     "status": "ok",
     "timestamp": 1693777653652,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "qAcmKgItEge1",
    "outputId": "36857196-6905-48d6-b228-fc2ab67b8678"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(10000, 784)\n",
      "(10000, 1, 28, 28)\n"
     ]
    }
   ],
   "source": [
    "# Gaussian random noise\n",
    "mean_test_out = np.random.uniform(low=0,high=1,size=784)\n",
    "cov_test_out = np.random.rand(784,784)\n",
    "cov_test_out = np.matmul(cov_test_out, cov_test_out.T)\n",
    "X_test_out = np.random.multivariate_normal(mean_test_out, cov_test_out, 10000)\n",
    "X_test_out = MinMaxScaler().fit_transform(X_test_out)\n",
    "print(X_test_out.shape)\n",
    "X_test_out = X_test_out.reshape((-1,1,28,28))\n",
    "print(X_test_out.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "i1d4sx8MitCF"
   },
   "outputs": [],
   "source": [
    "# X_test_out_tensor = torch.tensor(X_test_out, dtype=torch.float32)\n",
    "# train_loader_Gaussian = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=64, shuffle=True)\n",
    "\n",
    "# # model = ConvAutoencoder()\n",
    "# # criterion = nn.MSELoss()\n",
    "# # optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n",
    "\n",
    "# train(model, train_loader_Gaussian, criterion, optimizer, num_epochs=num_epoch)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 787,
     "status": "ok",
     "timestamp": 1693777654437,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "QxSu2wMBxyYr",
    "outputId": "17864d79-59c4-431a-a76d-791154860f84"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(10000, 4, 7, 7)\n",
      "(10000, 196)\n"
     ]
    }
   ],
   "source": [
    "X_test_out_tensor = torch.tensor(X_test_out, dtype=torch.float32)\n",
    "test_loader = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=10000)\n",
    "\n",
    "for batch in test_loader:\n",
    "    img = batch\n",
    "    img = img.reshape(-1,1,28,28)\n",
    "    # Generating output\n",
    "    #out = model.forward(img)\n",
    "    latent_test_out = model.forward_encoder(img)\n",
    "\n",
    "\n",
    "latent_test_out_Gaussian = latent_test_out.detach().numpy()\n",
    "print(latent_test_out_Gaussian.shape)\n",
    "latent_test_out_Gaussian = latent_test_out_Gaussian.reshape(-1,4*7*7)\n",
    "print(latent_test_out_Gaussian.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 4101,
     "status": "ok",
     "timestamp": 1693777658535,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "l24-4OTyxyaj",
    "outputId": "fb9830fe-9db8-4e9b-f092-45075b3394b3"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(10000, 500)\n",
      "[[10 14 11 ... 15  6  4]\n",
      " [10 14  6 ... 15  6  4]\n",
      " [10 14 11 ...  6  6  6]\n",
      " ...\n",
      " [10  4 10 ...  6  6  6]\n",
      " [13  4 14 ...  9  6  6]\n",
      " [10  4 10 ...  9  6  6]]\n",
      "0.33499496593186373\n",
      "0.007812088395399784\n"
     ]
    }
   ],
   "source": [
    "leaves_test_out_Gaussian = et.apply(latent_test_out_Gaussian)\n",
    "\n",
    "print(leaves_test_out_Gaussian.shape)\n",
    "print(leaves_test_out_Gaussian)\n",
    "\n",
    "distances_test_out_Gaussian = np.zeros((500,500))\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_test_out_Gaussian[i,j] = hamming(leaves_test_out_Gaussian[i,:], leaves_test_out_Gaussian[j,:])\n",
    "\n",
    "score_test_out_Gaussian = sum(distances_test_out_Gaussian)/499\n",
    "\n",
    "print(np.mean(score_test_out_Gaussian))\n",
    "print(np.cov(score_test_out_Gaussian))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "4Zs0-6kFybMM"
   },
   "source": [
    "## Uniform"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "BHdD_LxEjNmI"
   },
   "outputs": [],
   "source": [
    "# model_Conv_AE_OOD_MNIST_train_30 = 'classifier.pt'\n",
    "# path = \"/content/gdrive/My Drive/{model_Conv_AE_OOD_MNIST_train_30}\"\n",
    "# model.load_state_dict(torch.load(path))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 233,
     "status": "ok",
     "timestamp": 1693777658747,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "6b5z_lW8xycj",
    "outputId": "84463dbd-1e62-4ce6-b44d-99eedccc87e0"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(10000, 784)\n",
      "(10000, 1, 28, 28)\n"
     ]
    }
   ],
   "source": [
    "# Uniform random noise\n",
    "x_min = [0]*784\n",
    "x_max = [1]*784\n",
    "X_test_out = np.random.uniform(low=x_min, high=x_max, size=(10000,784))\n",
    "X_test_out = MinMaxScaler().fit_transform(X_test_out)\n",
    "print(X_test_out.shape)\n",
    "X_test_out = X_test_out.reshape((-1,1,28,28))\n",
    "print(X_test_out.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "X6MhxpC2jQU2"
   },
   "outputs": [],
   "source": [
    "# X_test_out_tensor = torch.tensor(X_test_out, dtype=torch.float32)\n",
    "# train_loader_Uniform = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=64, shuffle=True)\n",
    "\n",
    "# # model = ConvAutoencoder()\n",
    "# # criterion = nn.MSELoss()\n",
    "# # optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n",
    "\n",
    "# train(model, train_loader_Uniform, criterion, optimizer, num_epochs=num_epoch)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 719,
     "status": "ok",
     "timestamp": 1693777659464,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "BG0fLErIxyeZ",
    "outputId": "98c89adc-ab42-4335-eb08-50909c8dd680"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(10000, 4, 7, 7)\n",
      "(10000, 196)\n"
     ]
    }
   ],
   "source": [
    "X_test_out_tensor = torch.tensor(X_test_out, dtype=torch.float32)\n",
    "test_loader = torch.utils.data.DataLoader(X_test_out_tensor, batch_size=10000)\n",
    "\n",
    "for batch in test_loader:\n",
    "    img = batch\n",
    "    img = img.reshape(-1,1,28,28)\n",
    "    # Generating output\n",
    "    #out = model.forward(img)\n",
    "    latent_test_out = model.forward_encoder(img)\n",
    "\n",
    "\n",
    "latent_test_out_Uniform = latent_test_out.detach().numpy()\n",
    "print(latent_test_out_Uniform.shape)\n",
    "latent_test_out_Uniform = latent_test_out_Uniform.reshape(-1,4*7*7)\n",
    "print(latent_test_out_Uniform.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 4629,
     "status": "ok",
     "timestamp": 1693777664089,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "FvwHsDP5yif8",
    "outputId": "f941bf64-a347-45f1-8600-d3d89c24ef2c"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(10000, 500)\n",
      "[[10  4  6 ...  6  6  4]\n",
      " [10  4  3 ...  6  6  6]\n",
      " [ 9 14  6 ... 15  6  6]\n",
      " ...\n",
      " [10  4 10 ...  2  6  4]\n",
      " [10 14  3 ... 15  6  4]\n",
      " [10  4 11 ...  6  6  4]]\n",
      "0.3288791182364733\n",
      "0.00028459607539060676\n"
     ]
    }
   ],
   "source": [
    "leaves_test_out_Uniform = et.apply(latent_test_out_Uniform)\n",
    "\n",
    "print(leaves_test_out_Uniform.shape)\n",
    "print(leaves_test_out_Uniform)\n",
    "\n",
    "distances_test_out_Uniform = np.zeros((500,500))\n",
    "\n",
    "for i in range(500):\n",
    "    for j in range(500):\n",
    "        distances_test_out_Uniform[i,j] = hamming(leaves_test_out_Uniform[i,:], leaves_test_out_Uniform[j,:])\n",
    "\n",
    "score_test_out_Uniform = sum(distances_test_out_Uniform)/499\n",
    "\n",
    "print(np.mean(score_test_out_Uniform))\n",
    "print(np.cov(score_test_out_Uniform))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "N_CKgEZ1yiju"
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "nxm_k1Noyil7"
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "FZGej4gIyhMD"
   },
   "source": [
    "# Results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 17,
     "status": "ok",
     "timestamp": 1693777664487,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "xMKKU1PywASB",
    "outputId": "af631800-4801-4dce-d16e-5a9a4c989e42"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.981246\n",
      "1.0\n",
      "1.0\n",
      "1.0\n",
      "0.999816\n",
      "0.987344\n",
      "0.992648\n",
      "1.0\n",
      "1.0\n",
      "0.960008\n",
      "0.47357400000000005\n",
      "1.0\n",
      "1.0\n",
      "1.0\n",
      "1.0\n",
      "1.0\n"
     ]
    }
   ],
   "source": [
    "score_pred_Breast = np.concatenate([score_test_in, score_test_out_Breast])\n",
    "score_pred_Chest = np.concatenate([score_test_in, score_test_out_Chest])\n",
    "score_pred_Oct = np.concatenate([score_test_in, score_test_out_Oct])\n",
    "score_pred_Organa = np.concatenate([score_test_in, score_test_out_Organa])\n",
    "score_pred_Organc = np.concatenate([score_test_in, score_test_out_Organc])\n",
    "score_pred_Organs = np.concatenate([score_test_in, score_test_out_Organs])\n",
    "score_pred_Pneum = np.concatenate([score_test_in, score_test_out_Pneum])\n",
    "score_pred_Tissue = np.concatenate([score_test_in, score_test_out_Tissue])\n",
    "score_pred_KMNIST = np.concatenate([score_test_in, score_test_out_KMNIST])\n",
    "score_pred_QMNIST = np.concatenate([score_test_in, score_test_out_QMNIST])\n",
    "score_pred_Omniglot = np.concatenate([score_test_in, score_test_out_Omniglot])\n",
    "score_pred_Cifar10bw = np.concatenate([score_test_in, score_test_out_Cifar10bw])\n",
    "score_pred_NotMNIST = np.concatenate([score_test_in, score_test_out_NotMNIST])\n",
    "score_pred_Gaussian = np.concatenate([score_test_in, score_test_out_Gaussian])\n",
    "score_pred_Uniform = np.concatenate([score_test_in, score_test_out_Uniform])\n",
    "score_true = np.concatenate([np.ones(500), np.zeros(500)])\n",
    "\n",
    "print(roc_auc_score(score_true, score_pred_Breast))\n",
    "print(roc_auc_score(score_true, score_pred_Chest))\n",
    "print(roc_auc_score(score_true, score_pred_Oct))\n",
    "print(roc_auc_score(score_true, score_pred_Organa))\n",
    "print(roc_auc_score(score_true, score_pred_Organc))\n",
    "print(roc_auc_score(score_true, score_pred_Organs))\n",
    "print(roc_auc_score(score_true, score_pred_Pneum))\n",
    "print(roc_auc_score(score_true, score_pred_Tissue))\n",
    "print(roc_auc_score(score_true, score_pred_KMNIST))\n",
    "print(roc_auc_score(score_true, score_pred_QMNIST))\n",
    "print(roc_auc_score(score_true, score_pred_Omniglot))\n",
    "print(roc_auc_score(score_true, score_pred_Cifar10bw))\n",
    "print(roc_auc_score(score_true, score_pred_NotMNIST))\n",
    "print(roc_auc_score(score_true, score_pred_Gaussian))\n",
    "print(roc_auc_score(score_true, score_pred_Uniform))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 13,
     "status": "ok",
     "timestamp": 1693777664488,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "77iJeZtfwYyj",
    "outputId": "45a6ca1c-6e85-4fa1-c7c8-1bcb1a46804f"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.9587945370388583\n",
      "1.0\n",
      "1.0\n",
      "1.0\n",
      "0.9998073824946845\n",
      "0.9601881645523316\n",
      "0.978177780986046\n",
      "1.0\n",
      "1.0\n",
      "0.952141789681812\n",
      "0.5174666908760122\n",
      "1.0\n",
      "1.0\n",
      "1.0\n",
      "1.0\n",
      "1.0\n"
     ]
    }
   ],
   "source": [
    "precision_Breast, recall_Breast, thresholds_Breast = precision_recall_curve(score_true, score_pred_Breast)\n",
    "precision_Chest, recall_Chest, thresholds_Chest = precision_recall_curve(score_true, score_pred_Chest)\n",
    "precision_Oct, recall_Oct, thresholds_Oct = precision_recall_curve(score_true, score_pred_Oct)\n",
    "precision_Organa, recall_Organa, thresholds_Organa = precision_recall_curve(score_true, score_pred_Organa)\n",
    "precision_Organc, recall_Organc, thresholds_Organc = precision_recall_curve(score_true, score_pred_Organc)\n",
    "precision_Organs, recall_Organs, thresholds_Organs = precision_recall_curve(score_true, score_pred_Organs)\n",
    "precision_Pneum, recall_Pneum, thresholds_Pneum = precision_recall_curve(score_true, score_pred_Pneum)\n",
    "precision_Tissue, recall_Tissue, thresholds_Tissue = precision_recall_curve(score_true, score_pred_Tissue)\n",
    "precision_KMNIST, recall_KMNIST, thresholds_KMNIST = precision_recall_curve(score_true, score_pred_KMNIST)\n",
    "precision_QMNIST, recall_QMNIST, thresholds_QMNIST = precision_recall_curve(score_true, score_pred_QMNIST)\n",
    "precision_Omniglot, recall_Omniglot, thresholds_Omniglot = precision_recall_curve(score_true, score_pred_Omniglot)\n",
    "precision_Cifar10bw, recall_Cifar10bw, threshold_Cifar10bw = precision_recall_curve(score_true, score_pred_Cifar10bw)\n",
    "precision_NotMNIST, recall_NotMNIST, thresholds_NotMNIST = precision_recall_curve(score_true, score_pred_NotMNIST)\n",
    "precision_Gaussian, recall_Gaussian, thresholds_Gaussian = precision_recall_curve(score_true, score_pred_Gaussian)\n",
    "precision_Uniform, recall_Uniform, thresholds_Uniform = precision_recall_curve(score_true, score_pred_Uniform)\n",
    "\n",
    "auc_precision_recall_Breast = auc(recall_Breast, precision_Breast)\n",
    "auc_precision_recall_Chest = auc(recall_Chest, precision_Chest)\n",
    "auc_precision_recall_Oct = auc(recall_Oct, precision_Oct)\n",
    "auc_precision_recall_Organa = auc(recall_Organa, precision_Organa)\n",
    "auc_precision_recall_Organc = auc(recall_Organc, precision_Organc)\n",
    "auc_precision_recall_Organs = auc(recall_Organs, precision_Organs)\n",
    "auc_precision_recall_Pneum = auc(recall_Pneum, precision_Pneum)\n",
    "auc_precision_recall_Tissue = auc(recall_Tissue, precision_Tissue)\n",
    "auc_precision_recall_KMNIST = auc(recall_KMNIST, precision_KMNIST)\n",
    "auc_precision_recall_QMNIST = auc(recall_QMNIST, precision_QMNIST)\n",
    "auc_precision_recall_Omniglot = auc(recall_Omniglot, precision_Omniglot)\n",
    "auc_precision_recall_Cifar10bw = auc(recall_Cifar10bw, precision_Cifar10bw)\n",
    "auc_precision_recall_NotMNIST = auc(recall_NotMNIST, precision_NotMNIST)\n",
    "auc_precision_recall_Gaussian = auc(recall_Gaussian, precision_Gaussian)\n",
    "auc_precision_recall_Uniform = auc(recall_Uniform, precision_Uniform)\n",
    "\n",
    "print(auc_precision_recall_Breast)\n",
    "print(auc_precision_recall_Chest)\n",
    "print(auc_precision_recall_Oct)\n",
    "print(auc_precision_recall_Organa)\n",
    "print(auc_precision_recall_Organc)\n",
    "print(auc_precision_recall_Organs)\n",
    "print(auc_precision_recall_Pneum)\n",
    "print(auc_precision_recall_Tissue)\n",
    "print(auc_precision_recall_KMNIST)\n",
    "print(auc_precision_recall_QMNIST)\n",
    "print(auc_precision_recall_Omniglot)\n",
    "print(auc_precision_recall_Cifar10bw)\n",
    "print(auc_precision_recall_NotMNIST)\n",
    "print(auc_precision_recall_Gaussian)\n",
    "print(auc_precision_recall_Uniform)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 165,
     "status": "ok",
     "timestamp": 1693777664642,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "NESkWnxQR1-0",
    "outputId": "3d5b9e12-58b9-41ff-caf1-9dce445b13dd"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.036\n",
      "0.0\n",
      "0.0\n",
      "0.0\n",
      "0.0\n",
      "0.018\n",
      "0.012\n",
      "0.0\n",
      "0.0\n",
      "0.138\n",
      "0.976\n",
      "0.0\n",
      "0.0\n",
      "0.0\n",
      "0.0\n",
      "0.0\n"
     ]
    }
   ],
   "source": [
    "def compute_fpr95(y_true, y_pred_probs):\n",
    "    fpr, tpr, thresholds = sklearn.metrics.roc_curve(y_true, y_pred_probs)\n",
    "    idx = np.abs(tpr - 0.95).argmin()\n",
    "    fpr95 = fpr[idx]\n",
    "    return fpr95\n",
    "\n",
    "# Example usage\n",
    "# Assuming you have y_true (true labels) and y_pred_probs (predicted probabilities)\n",
    "fpr95_score_Breast = compute_fpr95(score_true, score_pred_Breast)\n",
    "fpr95_score_Chest = compute_fpr95(score_true, score_pred_Chest)\n",
    "fpr95_score_Oct = compute_fpr95(score_true, score_pred_Oct)\n",
    "fpr95_score_Organa = compute_fpr95(score_true, score_pred_Organa)\n",
    "fpr95_score_Organc = compute_fpr95(score_true, score_pred_Organc)\n",
    "fpr95_score_Organs = compute_fpr95(score_true, score_pred_Organs)\n",
    "fpr95_score_Pneum = compute_fpr95(score_true, score_pred_Pneum)\n",
    "fpr95_score_Tissue = compute_fpr95(score_true, score_pred_Tissue)\n",
    "fpr95_score_KMNIST = compute_fpr95(score_true, score_pred_KMNIST)\n",
    "fpr95_score_QMNIST = compute_fpr95(score_true, score_pred_QMNIST)\n",
    "fpr95_score_Omniglot = compute_fpr95(score_true, score_pred_Omniglot)\n",
    "fpr95_score_NotMNIST = compute_fpr95(score_true, score_pred_NotMNIST)\n",
    "fpr95_score_Cifar10bw = compute_fpr95(score_true, score_pred_Cifar10bw)\n",
    "fpr95_score_Gaussian = compute_fpr95(score_true, score_pred_Gaussian)\n",
    "fpr95_score_Uniform = compute_fpr95(score_true, score_pred_Uniform)\n",
    "\n",
    "print(fpr95_score_Breast)\n",
    "print(fpr95_score_Chest)\n",
    "print(fpr95_score_Oct)\n",
    "print(fpr95_score_Organa)\n",
    "print(fpr95_score_Organc)\n",
    "print(fpr95_score_Organs)\n",
    "print(fpr95_score_Pneum)\n",
    "print(fpr95_score_Tissue)\n",
    "print(fpr95_score_KMNIST)\n",
    "print(fpr95_score_QMNIST)\n",
    "print(fpr95_score_Omniglot)\n",
    "print(fpr95_score_NotMNIST)\n",
    "print(fpr95_score_Cifar10bw)\n",
    "print(fpr95_score_Uniform)\n",
    "print(fpr95_score_Gaussian)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "6OgOveH0R2BF"
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 11,
     "status": "ok",
     "timestamp": 1693777664642,
     "user": {
      "displayName": "Zhaiming Shen",
      "userId": "12760861740580065439"
     },
     "user_tz": 240
    },
    "id": "ne3WwJ3iDJmu",
    "outputId": "87e86554-554b-4a8f-ad1e-c0a2a7c41e94"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.871146869739479 6.698297940476351e-05\n",
      "0.7336609699398798 0.005447421200934667\n",
      "0.5002765050100201 0.0042377139675370155\n",
      "0.35741585571142287 0.0020381411440108894\n",
      "0.3729050581162325 0.0042630307764521345\n",
      "0.5954216432865733 0.011598263928196146\n",
      "0.5696672384769539 0.012447356490973595\n",
      "0.5727371703406814 0.011573973577545712\n",
      "0.25875434068136277 0.0038242125133755665\n",
      "0.3627229018036072 0.006573450811076395\n",
      "0.8379014028056112 0.0003529398326534493\n",
      "0.8716412024048095 5.010651252746746e-05\n",
      "0.22584562725450905 0.0013018556112509212\n",
      "0.5294821803607215 0.0037189329746369383\n",
      "0.35013604809619236 0.006879250267215913\n",
      "0.33499496593186373 0.007812088395399784\n",
      "0.3288791182364733 0.00028459607539060676\n"
     ]
    }
   ],
   "source": [
    "print(np.mean(score_test_in), np.cov(score_test_in))\n",
    "print(np.mean(score_test_out_Breast), np.cov(score_test_out_Breast))\n",
    "print(np.mean(score_test_out_Chest), np.cov(score_test_out_Chest))\n",
    "print(np.mean(score_test_out_Oct), np.cov(score_test_out_Oct))\n",
    "print(np.mean(score_test_out_Organa), np.cov(score_test_out_Organa))\n",
    "print(np.mean(score_test_out_Organc), np.cov(score_test_out_Organc))\n",
    "print(np.mean(score_test_out_Organs), np.cov(score_test_out_Organs))\n",
    "print(np.mean(score_test_out_Pneum), np.cov(score_test_out_Pneum))\n",
    "print(np.mean(score_test_out_Tissue), np.cov(score_test_out_Tissue))\n",
    "print(np.mean(score_test_out_KMNIST), np.cov(score_test_out_KMNIST))\n",
    "print(np.mean(score_test_out_QMNIST), np.cov(score_test_out_QMNIST))\n",
    "print(np.mean(score_test_out_Omniglot), np.cov(score_test_out_Omniglot))\n",
    "print(np.mean(score_test_out_Cifar10bw), np.cov(score_test_out_Cifar10bw))\n",
    "print(np.mean(score_test_out_NotMNIST), np.cov(score_test_out_NotMNIST))\n",
    "print(np.mean(score_test_out_Gaussian), np.cov(score_test_out_Gaussian))\n",
    "print(np.mean(score_test_out_Uniform), np.cov(score_test_out_Uniform))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "WhIA-nVAfFCu"
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "Ix-DmCBdrucT"
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "CgBZPC9Truem"
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "colab": {
   "authorship_tag": "ABX9TyNoFVV9NW/ilslEOrdUXH2M",
   "machine_shape": "hm",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
