{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1a6438c3",
   "metadata": {},
   "outputs": [],
   "source": [
    "import time\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import umap\n",
    "from sklearn.manifold import TSNE\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "from matplotlib.cm import hsv, twilight_shifted\n",
    "from sklearn.neighbors import KNeighborsClassifier\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from collections import defaultdict\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1078eb68",
   "metadata": {},
   "source": [
    "# Load data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b74bdc97",
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset_name = 'MNIST'\n",
    "version='d1'\n",
    "if dataset_name in ['COIL-20','COIL-100']:\n",
    "    data = np.load(f'data/{dataset_name}/prepared/data.npy')\n",
    "    try:\n",
    "        labels = np.load(f'data/{dataset_name}/prepared/labels.npy')\n",
    "    except FileNotFoundError:\n",
    "        labels = np.ones(data.shape[0])\n",
    "else:\n",
    "    data = np.load(f'data/{dataset_name}/prepared/train_data.npy')\n",
    "    try:\n",
    "        labels = np.load(f'data/{dataset_name}/prepared/train_labels.npy')\n",
    "    except FileNotFoundError:\n",
    "        labels = np.ones(data.shape[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7702a92d",
   "metadata": {},
   "outputs": [],
   "source": [
    "if len(data.shape) > 2:\n",
    "    data = data.reshape(data.shape[0], -1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "34b4821c-ec0d-487a-baf1-57d927f5ad1a",
   "metadata": {},
   "outputs": [],
   "source": [
    "data.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a4ea6920-e476-4b38-93d0-2b78d0016d77",
   "metadata": {},
   "outputs": [],
   "source": [
    "latent_dim = 16"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4fdc8f2a-971f-4099-bb63-0d1becb137f4",
   "metadata": {},
   "source": [
    "# PCA and UMAP"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3ac5357d",
   "metadata": {},
   "outputs": [],
   "source": [
    "data = data.astype('float32')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e68b0d79",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from sklearn.decomposition import PCA\n",
    "from sklearn.datasets import fetch_openml\n",
    "\n",
    "# Load the MNIST dataset\n",
    "\n",
    "# Normalize the data (optional, but recommended for PCA)\n",
    "data /= 255.0\n",
    "\n",
    "# Initialize and fit PCA with 16 components\n",
    "n_components = latent_dim\n",
    "pca = PCA(n_components=n_components)\n",
    "X_pca = pca.fit_transform(data)\n",
    "\n",
    "# Now, X_pca contains the MNIST data reduced to 16 dimensions\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c2ca8e86-5d43-4f77-bed9-48fa59dfdf83",
   "metadata": {},
   "outputs": [],
   "source": [
    "X_pca.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c8d406dd-4011-4e37-9a9b-334081308d07",
   "metadata": {},
   "outputs": [],
   "source": [
    "np.save(f'data/{dataset_name}/PCA_latent_output_{version}.npy', X_pca)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1bad2955-d91f-454e-bdbd-c41d4c6ec361",
   "metadata": {},
   "outputs": [],
   "source": [
    "n_components_umap = latent_dim\n",
    "umap_model = umap.UMAP(n_components=n_components_umap, random_state=42)\n",
    "X_umap = umap_model.fit_transform(data)\n",
    "\n",
    "# Now, X_tsne and X_umap contain the MNIST data reduced to 16 dimensions using t-SNE and UMAP, respectively"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fe8519af-6a88-4c6e-a75d-a9dc80805803",
   "metadata": {},
   "outputs": [],
   "source": [
    "X_umap.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cbf8742e-259c-4397-9e03-eae9daeee4e3",
   "metadata": {},
   "outputs": [],
   "source": [
    "np.save(f'data/{dataset_name}/UMAP_latent_output_{version}.npy', X_umap)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8869b0fa-63c9-40c5-ac51-612518d7eb4b",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.18"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
