{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2\n",
    "%env CUDA_VISIBLE_DEVICES=1\n",
    "import matplotlib.pyplot as plt\n",
    "from utils.utils import get_path\n",
    "from utils.io_utils import load_multiple_res\n",
    "from utils.pd_utils import get_outlier_scores_best_auc, filter_dgms\n",
    "import os\n",
    "import numpy as np"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "e652a9d1ebacec97"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "style_file = \"utils.style\"\n",
    "plt.style.use(style_file)"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "6675f28ac7880332"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "root_path = get_path(\"data\")\n",
    "fig_path = os.path.join(root_path, \"figures\")"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "93f62199eec3e53d"
  },
  {
   "cell_type": "markdown",
   "source": [
    "# Figs with many methods on toy data"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "ec0bf3966b05c30b"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "distances = {\n",
    "    \"euclidean\": [{}],\n",
    "    \"fermat\": [\n",
    "               {\"p\": 2},\n",
    "               {\"p\": 3},\n",
    "               {\"p\": 5},\n",
    "               {\"p\": 7}\n",
    "               ],\n",
    "    \"dtm\" : [\n",
    "            {\"k\": 4, \"p_dtm\": 2, \"p_radius\": 1},\n",
    "            {\"k\": 4, \"p_dtm\": np.inf, \"p_radius\": 1},\n",
    "            {\"k\": 15, \"p_dtm\": 2, \"p_radius\": 1},\n",
    "            {\"k\": 15, \"p_dtm\": np.inf, \"p_radius\": 1},\n",
    "            {\"k\": 100, \"p_dtm\": 2, \"p_radius\": 1},\n",
    "            {\"k\": 100, \"p_dtm\": np.inf, \"p_radius\": 1},\n",
    "\n",
    "            {\"k\": 4, \"p_dtm\": 2, \"p_radius\": 2},\n",
    "            {\"k\": 4, \"p_dtm\": np.inf, \"p_radius\": 2},\n",
    "            {\"k\": 15, \"p_dtm\": 2, \"p_radius\": 2},\n",
    "            {\"k\": 15, \"p_dtm\": np.inf, \"p_radius\": 2},\n",
    "            {\"k\": 100, \"p_dtm\": 2, \"p_radius\": 2}, \n",
    "            {\"k\": 100, \"p_dtm\": np.inf, \"p_radius\": 2},\n",
    "\n",
    "            {\"k\": 4, \"p_dtm\": 2, \"p_radius\": np.inf},\n",
    "            {\"k\": 4, \"p_dtm\": np.inf, \"p_radius\": np.inf}, \n",
    "            {\"k\": 15, \"p_dtm\": 2, \"p_radius\": np.inf},\n",
    "            {\"k\": 15, \"p_dtm\": np.inf, \"p_radius\": np.inf},\n",
    "            {\"k\": 100, \"p_dtm\": 2, \"p_radius\": np.inf},\n",
    "            {\"k\": 100, \"p_dtm\": np.inf, \"p_radius\": np.inf},\n",
    "    ],\n",
    "    \"core\": [\n",
    "        {\"k\": 15},\n",
    "        {\"k\": 100}\n",
    "    ],\n",
    "    \"sknn_dist\": [\n",
    "        {\"k\": 15},\n",
    "        {\"k\": 100}\n",
    "    ],\n",
    "    \"tsne\": [\n",
    "         {\"perplexity\": 30},\n",
    "         {\"perplexity\": 200},\n",
    "         {\"perplexity\": 333}\n",
    "    ],\n",
    "    \"umap\": [\n",
    "         {\"k\": 100, \"use_rho\": True, \"include_self\": True},\n",
    "         {\"k\": 999, \"use_rho\": True, \"include_self\": True},\n",
    "    ],\n",
    "    \"tsne_embd\": [\n",
    "        {\"perplexity\": 8, \"n_epochs\": 500, \"n_early_epochs\": 250, \"rescale_tsne\": True},\n",
    "        {\"perplexity\": 30, \"n_epochs\": 500, \"n_early_epochs\": 250, \"rescale_tsne\": True},\n",
    "        {\"perplexity\": 333, \"n_epochs\": 500, \"n_early_epochs\": 250, \"rescale_tsne\": True}\n",
    "    ],\n",
    "    \"umap_embd\": [\n",
    "        {\"k\": 15, \"n_epochs\": 750, \"min_dist\": 0.1, \"metric\": \"euclidean\"},\n",
    "        {\"k\": 100, \"n_epochs\": 750, \"min_dist\": 0.1, \"metric\": \"euclidean\"},\n",
    "        {\"k\": 999, \"n_epochs\": 750, \"min_dist\": 0.1, \"metric\": \"euclidean\"},\n",
    "    ],\n",
    "    \"eff_res\": [\n",
    "        {\"corrected\": True, \"weighted\": False, \"k\": 15, \"disconnect\": True},\n",
    "        {\"corrected\": True, \"weighted\": False, \"k\": 100, \"disconnect\": True,}\n",
    "    ],\n",
    "    \"diffusion\": [\n",
    "        {\"k\": 15, \"t\": 8, \"kernel\": \"sknn\", \"include_self\": False},\n",
    "        {\"k\": 100, \"t\": 8, \"kernel\": \"sknn\", \"include_self\": False},\n",
    "        {\"k\": 15, \"t\": 64, \"kernel\": \"sknn\", \"include_self\": False},\n",
    "        {\"k\": 100, \"t\": 64, \"kernel\": \"sknn\", \"include_self\": False},\n",
    "    ],\n",
    "    \"spectral\": [\n",
    "        {\"k\": 15, \"normalization\": \"none\", \"n_evecs\": 2, \"weighted\": False},\n",
    "        {\"k\": 15, \"normalization\": \"none\", \"n_evecs\": 5, \"weighted\": False},\n",
    "        {\"k\": 15, \"normalization\": \"none\", \"n_evecs\": 10, \"weighted\": False},\n",
    "    ],\n",
    "}\n",
    "\n",
    "n = 1000\n",
    "sigmas = np.linspace(0.0, 0.4, 33)[:-4]\n",
    "sigmas = np.array([np.format_float_positional(sigma, precision=4, unique=True, trim='0') for sigma in sigmas]).astype(float)\n",
    "seeds = [0, 1, 2]"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "e24ad21814854442"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [],
   "metadata": {
    "collapsed": false
   },
   "id": "3d0703b7ef44fb14"
  },
  {
   "cell_type": "markdown",
   "source": [
    "## Circle"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "dc5bbb9f97ac1dd6"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "dataset = \"toy_circle\""
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "5f62095692d5931b"
  },
  {
   "cell_type": "markdown",
   "source": [
    "### d = 50"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "5daa097977be7490"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "embd_dim = 50\n",
    "all_res = load_multiple_res(datasets=dataset, distances=distances, root_path=root_path, n=n, embd_dims=embd_dim, sigmas=sigmas, seeds=seeds, n_threads=10)"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "e4729160ff6e1408"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# filtering \n",
    "dob = 1.25\n",
    "all_res = filter_dgms(all_res, dim=1, dob=dob, binary=True)"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "82f5a27636ee230e"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# compute outlier scores\n",
    "outlier_scores, best_aucs = get_outlier_scores_best_auc(all_res, dim=1, n_features=1)"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "f62ade69183ea46c"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "best_aucs[\"dtm\"]"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "9d3b3570aa954805"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# only keep the dtm runs with the best auc for each p_radius\n",
    "filter_dtm_dists(outlier_scores)"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "f56f88894f9998c9"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "plot_many_dists(outlier_scores=outlier_scores, sigmas=sigmas, ylabel=\"Loop detection score\", fig_path=fig_path, fig_title=f\"fig_{dataset}_{embd_dim}_many_dob.pdf\")"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "6b909f994685b6f2"
  },
  {
   "cell_type": "markdown",
   "source": [
    "### d=2"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "67630a61113806cd"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "embd_dim = 2\n",
    "distances[\"sknn_dist\"] = [{\"k\": 100}]  # k=15 gave convergence issues with the shortest path solver\n",
    "all_res = load_multiple_res(datasets=dataset, distances=distances, root_path=root_path, n=n, embd_dims=embd_dim, sigmas=sigmas, seeds=seeds, n_threads=10)"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "ae6b300fd1272154"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# filtering \n",
    "dob = 1.25\n",
    "all_res = filter_dgms(all_res, dim=1, dob=dob, binary=True)"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "b3af7528146271bf"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# compute outlier scores\n",
    "outlier_scores, best_aucs = get_outlier_scores_best_auc(all_res, dim=1, n_features=1)"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "8ac2d58edd36058a"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# delete all but the best run for dtm for each p_radius\n",
    "filter_dtm_dists(outlier_scores)"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "76376562f1d3c1c5"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "plot_many_dists(outlier_scores=outlier_scores, sigmas=sigmas, ylabel=\"Loop detection score\", fig_path=fig_path, fig_title=f\"fig_{dataset}_{embd_dim}_many_dob.pdf\")"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "16e2200a6ebf4983"
  },
  {
   "cell_type": "markdown",
   "source": [
    "### d=50 no filtering"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "8f13fd4bf9d27085"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "embd_dim = 50\n",
    "all_res = load_multiple_res(datasets=dataset, distances=distances, root_path=root_path, n=n, embd_dims=embd_dim, sigmas=sigmas, seeds=seeds, n_threads=10)"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "f7ae2b9205dbd2f3"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# compute outlier scores\n",
    "outlier_scores, best_aucs = get_outlier_scores_best_auc(all_res, dim=1, n_features=1)"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "1dd51d7d81f4be8c"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# delete all but the best run for dtm for each p_radius\n",
    "filter_dtm_dists(outlier_scores)"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "7430db3205cf908d"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "plot_many_dists(outlier_scores=outlier_scores, sigmas=sigmas, ylabel=\"Loop detection score\", fig_path=fig_path, fig_title=f\"fig_{dataset}_{embd_dim}_many.pdf\")"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "b09ed9960120780"
  },
  {
   "cell_type": "markdown",
   "source": [
    "# Eyeglasses"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "4bbdd198c22ba6f4"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "dataset = \"eyeglasses\"\n",
    "embd_dim = 50\n",
    "all_res = load_multiple_res(datasets=dataset, distances=distances, root_path=root_path, n=n, embd_dims=embd_dim, sigmas=sigmas[:13], seeds=seeds, n_threads=10)"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "e22b64dd540547a"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# filtering \n",
    "dob = 1.25\n",
    "all_res = filter_dgms(all_res, dim=1, dob=dob, binary=True)"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "2bea26e0809ffa5"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# compute outlier scores\n",
    "outlier_scores, best_aucs = get_outlier_scores_best_auc(all_res, dim=1, n_features=1)"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "408b3c5437e26751"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# delete all but the best run for dtm for each p_radius\n",
    "filter_dtm_dists(outlier_scores)"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "71592be56978ee98"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "plot_many_dists(outlier_scores=outlier_scores, sigmas=sigmas[:13], ylabel=\"Loop detection score\", fig_path=fig_path, fig_title=f\"fig_{dataset}_{embd_dim}_many_dob.pdf\")"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "962214fc5967a5c9"
  },
  {
   "cell_type": "markdown",
   "source": [
    "# Linked circles"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "57127ebf2cc569b0"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "dataset = \"inter_circles\"\n",
    "embd_dim = 50\n",
    "all_res = load_multiple_res(datasets=dataset, distances=distances, root_path=root_path, n=n, embd_dims=embd_dim, sigmas=sigmas, seeds=seeds, n_threads=10)"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "19873b75bccf0b22"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# filtering \n",
    "dob = 1.25\n",
    "all_res = filter_dgms(all_res, dim=1, dob=dob, binary=True)"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "22337ac3a3f7345c"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# compute outlier scores\n",
    "outlier_scores, best_aucs = get_outlier_scores_best_auc(all_res, dim=1, n_features=2)"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "e00033c0d4b342f8"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# delete all but the best run for dtm for each p_radius\n",
    "filter_dtm_dists(outlier_scores)"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "2e40c697145f6eeb"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "plot_many_dists(outlier_scores=outlier_scores, sigmas=sigmas, ylabel=\"2 loop detection score\", fig_path=fig_path, fig_title=f\"fig_{dataset}_{embd_dim}_many_dob.pdf\")"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "b7fa5b399191439d"
  },
  {
   "cell_type": "markdown",
   "source": [
    "# Sphere"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "8a24f45faaf04666"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "dataset = \"toy_sphere\"\n",
    "embd_dim = 50\n",
    "all_res = load_multiple_res(datasets=dataset, distances=distances, root_path=root_path, n=n, embd_dims=embd_dim, sigmas=sigmas, seeds=seeds, n_threads=10)"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "88e6ea616c6ce1ce"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# filtering \n",
    "dob = 1.25\n",
    "all_res = filter_dgms(all_res, dim=1, dob=dob, binary=True)\n",
    "all_res = filter_dgms(all_res, dim=2, dob=dob, binary=True)"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "8557b6aed35ccc24"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# compute outlier scores\n",
    "outlier_scores_1, best_aucs_1 = get_outlier_scores_best_auc(all_res, dim=1, n_features=1)\n",
    "outlier_scores_2, best_aucs_2 = get_outlier_scores_best_auc(all_res, dim=2, n_features=1)"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "ad908cf515d0d469"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# delete all but the best run for dtm for each p_radius\n",
    "filter_dtm_dists(outlier_scores_1)\n",
    "filter_dtm_dists(outlier_scores_2)"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "b2c57a964e6f1d13"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "plot_many_dists(outlier_scores=outlier_scores_1, sigmas=sigmas, ylabel=\"Loop detection score\", fig_path=fig_path, fig_title=f\"fig_{dataset}_{embd_dim}_many_1D_dob.pdf\")"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "f2874a8cdab882e2"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "plot_many_dists(outlier_scores=outlier_scores_2, sigmas=sigmas, ylabel=\"Void detection score\", fig_path=fig_path, fig_title=f\"fig_{dataset}_{embd_dim}_many_2D_dob.pdf\")"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "53bb31199ac9dd6a"
  },
  {
   "cell_type": "markdown",
   "source": [
    "# Torus"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "482eb38545a73750"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "dataset = \"torus\"\n",
    "embd_dim = 50\n",
    "all_res = load_multiple_res(datasets=dataset, distances=distances, root_path=root_path, n=n, embd_dims=embd_dim, sigmas=sigmas, seeds=seeds, n_threads=10)"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "7c7d2517c8f09631"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# filtering \n",
    "dob = 1.25\n",
    "all_res = filter_dgms(all_res, dim=1, dob=dob, binary=True)\n",
    "all_res = filter_dgms(all_res, dim=2, dob=dob, binary=True)"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "75e5527447e4ef5d"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# compute outlier scores\n",
    "outlier_scores_1, best_aucs_1 = get_outlier_scores_best_auc(all_res, dim=1, n_features=2)\n",
    "outlier_scores_2, best_aucs_2 = get_outlier_scores_best_auc(all_res, dim=2, n_features=1)"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "5df0b018cf65a21f"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# delete all but the best run for dtm for each p_radius\n",
    "filter_dtm_dists(outlier_scores_1)\n",
    "filter_dtm_dists(outlier_scores_2)"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "777840b82483f21c"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "plot_many_dists(outlier_scores=outlier_scores_1, sigmas=sigmas, ylabel=\"2 loop detection score\", fig_path=fig_path, fig_title=f\"fig_{dataset}_{embd_dim}_many_1D_dob.pdf\")"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "95a48db8a14ed0df"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "plot_many_dists(outlier_scores=outlier_scores_2, sigmas=sigmas, ylabel=\"Void detection score\", fig_path=fig_path, fig_title=f\"fig_{dataset}_{embd_dim}_many_2D_dob.pdf\")"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "379550f3da438551"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [],
   "metadata": {
    "collapsed": false
   },
   "id": "5080ab6117dbb0fe"
  }
 ],
 "metadata": {
  "kernelspec": {
   "name": "conda-env-ph-py",
   "language": "python",
   "display_name": "Python [conda env:ph]"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
