{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4b2c0817",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/etc/python/sitecustomize.py:117: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  mod = _original_import(name, globals, locals, fromlist, level)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "IterableDataset({\n",
      "    features: ['image', 'label'],\n",
      "    num_shards: 14\n",
      "})\n"
     ]
    }
   ],
   "source": [
    "from datasets import load_dataset\n",
    "from torchvision import transforms, models\n",
    "from torch.utils.data import DataLoader, Dataset\n",
    "import torch, torch.nn as nn, numpy as np\n",
    "\n",
    "ds = load_dataset(\"ILSVRC/imagenet-1k\", split=\"validation\", streaming=True) \n",
    "print(ds)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1fb16d2c",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "782it [08:16,  1.58it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Embeddings: (50000, 2048)\n"
     ]
    }
   ],
   "source": [
    "import torch\n",
    "import torch.nn as nn\n",
    "from torch.utils.data import IterableDataset, DataLoader   # ✅ add IterableDataset here\n",
    "from torchvision import transforms, models\n",
    "from datasets import load_dataset\n",
    "import numpy as np\n",
    "from tqdm import tqdm\n",
    "\n",
    "transform = transforms.Compose([\n",
    "    transforms.Resize(256),\n",
    "    transforms.CenterCrop(224),\n",
    "    transforms.ToTensor(),\n",
    "    transforms.Normalize(mean=[0.485, 0.456, 0.406],\n",
    "                         std=[0.229, 0.224, 0.225]),\n",
    "])\n",
    "\n",
    "class StreamingImageNet(IterableDataset):\n",
    "    def __init__(self, hf_ds, transform=None, max_samples=None):\n",
    "        self.ds = hf_ds\n",
    "        self.transform = transform\n",
    "        self.max_samples = max_samples\n",
    "\n",
    "    def __iter__(self):\n",
    "        for i, item in enumerate(self.ds):\n",
    "            image = item[\"image\"].convert(\"RGB\")  \n",
    "            label = item[\"label\"]\n",
    "            if self.transform:\n",
    "                image = self.transform(image)\n",
    "            yield image, label\n",
    "            if self.max_samples and i >= self.max_samples:\n",
    "                break\n",
    "\n",
    "\n",
    "imagenet_val = StreamingImageNet(ds, transform=transform) \n",
    "dataloader = DataLoader(imagenet_val, batch_size=64, num_workers=0)\n",
    "\n",
    "\n",
    "# Load pretrained ResNet-50\n",
    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
    "resnet = models.resnet50(weights=\"IMAGENET1K_V1\")\n",
    "resnet.fc = nn.Identity()\n",
    "resnet.eval().to(device)\n",
    "\n",
    "# Extract embeddings\n",
    "def extract_embeddings(loader, model, device):\n",
    "    all_feats, all_labels = [], []\n",
    "    with torch.no_grad():\n",
    "        for imgs, labels in tqdm(loader):\n",
    "            imgs = imgs.to(device)\n",
    "            feats = model(imgs)\n",
    "            all_feats.append(feats.cpu().numpy())\n",
    "            all_labels.append(labels.numpy())\n",
    "    return np.concatenate(all_feats), np.concatenate(all_labels)\n",
    "\n",
    "embeddings, labels = extract_embeddings(dataloader, resnet, device)\n",
    "print(\"Embeddings:\", embeddings.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "0ef301ca",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(50000, 2048)"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "embeddings.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6cb5a3d2",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "\n",
    "from PLDiv_sparse import PLDiv_Sparse_MST, fast_PLDiv_approx\n",
    "import statistics\n",
    "import time\n",
    "from magnipy.diversipy import Diversipy\n",
    "from vendi_score import vendi\n",
    "import torch\n",
    "from sklearn.metrics.pairwise import rbf_kernel, laplacian_kernel, cosine_distances, euclidean_distances, manhattan_distances"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "39120067",
   "metadata": {},
   "outputs": [],
   "source": [
    "embeddings = np.load(\"embeddings.npy\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2a5547a7",
   "metadata": {},
   "source": [
    "Cosine Similarity"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "66890115",
   "metadata": {},
   "outputs": [],
   "source": [
    "dist_matrix = cosine_distances(embeddings)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "dbe217af",
   "metadata": {},
   "outputs": [],
   "source": [
    "def dcscore (sim_product):\n",
    "    sim_product = torch.from_numpy(sim_product)\n",
    "    sim_probs = sim_product.softmax(dim=-1)\n",
    "    diversity = torch.sum(torch.diag(sim_probs))\n",
    "    return diversity.item()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "616866ff",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(5000, 5000)\n",
      "(5000, 5000)\n"
     ]
    }
   ],
   "source": [
    "dist_matrix_sub = dist_matrix[:5000, :5000]\n",
    "sim_matrix = 1 - dist_matrix_sub\n",
    "print(dist_matrix_sub.shape)\n",
    "print(sim_matrix.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "eecb8edd",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Run 1: 5.402 seconds\n",
      "Run 2: 5.444 seconds\n",
      "Run 3: 5.440 seconds\n",
      "Run 4: 5.441 seconds\n",
      "Run 5: 5.445 seconds\n",
      "\n",
      "Average runtime: 5.43 ± 0.02 s\n",
      "\n",
      "Average Diverity: 46.51 ± 0.00 s\n"
     ]
    }
   ],
   "source": [
    "runtimes = []\n",
    "div_runs = []\n",
    "\n",
    "for i in range(5):\n",
    "    start_time = time.time()\n",
    "    \n",
    "    pldiv = fast_PLDiv_approx(dist_matrix_sub)\n",
    "\n",
    "    end_time = time.time()\n",
    "    runtime = end_time - start_time\n",
    "    runtimes.append(runtime)\n",
    "    div_runs.append(pldiv)\n",
    "    \n",
    "    print(f\"Run {i+1}: {runtime:.3f} seconds\")\n",
    "\n",
    "\n",
    "mean_runtime = np.mean(runtimes)\n",
    "std_runtime = np.std(runtimes)\n",
    "mean_div = np.mean(div_runs)\n",
    "std_div = np.std(div_runs)\n",
    "\n",
    "\n",
    "print(f\"\\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s\")\n",
    "print(f\"\\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "id": "fe132f56",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Run 1: 3.961 seconds\n",
      "Run 2: 3.955 seconds\n",
      "Run 3: 3.927 seconds\n",
      "Run 4: 3.965 seconds\n",
      "Run 5: 4.030 seconds\n",
      "\n",
      "Average runtime: 3.97 ± 0.03 s\n",
      "\n",
      "Average Diverity: 46.52 ± 0.00 s\n"
     ]
    }
   ],
   "source": [
    "runtimes = []\n",
    "div_runs = []\n",
    "\n",
    "for i in range(5):\n",
    "    start_time = time.time()\n",
    "    \n",
    "    pldiv = PLDiv_Sparse_MST(dist_matrix_sub, sparse=0.95)\n",
    "\n",
    "    end_time = time.time()\n",
    "    runtime = end_time - start_time\n",
    "    runtimes.append(runtime)\n",
    "    div_runs.append(pldiv)\n",
    "    \n",
    "    print(f\"Run {i+1}: {runtime:.3f} seconds\")\n",
    "\n",
    "mean_runtime = np.mean(runtimes)\n",
    "std_runtime = np.std(runtimes)\n",
    "mean_div = np.mean(div_runs)\n",
    "std_div = np.std(div_runs)\n",
    "\n",
    "\n",
    "print(f\"\\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s\")\n",
    "print(f\"\\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "550a8b7c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Run 1: 2.606 seconds\n",
      "Run 2: 2.608 seconds\n",
      "Run 3: 2.614 seconds\n",
      "Run 4: 2.615 seconds\n",
      "Run 5: 2.619 seconds\n",
      "\n",
      "Average runtime: 2.61 ± 0.00 s\n",
      "\n",
      "Average Diverity: 47.32 ± 0.00 s\n"
     ]
    }
   ],
   "source": [
    "runtimes = []\n",
    "div_runs = []\n",
    "\n",
    "for i in range(5):\n",
    "    start_time = time.time()\n",
    "    \n",
    "    pldiv = PLDiv_Sparse_MST(dist_matrix_sub, sparse=10)\n",
    "\n",
    "    end_time = time.time()\n",
    "    runtime = end_time - start_time\n",
    "    runtimes.append(runtime)\n",
    "    div_runs.append(pldiv)\n",
    "    \n",
    "    print(f\"Run {i+1}: {runtime:.3f} seconds\")\n",
    "\n",
    "mean_runtime = np.mean(runtimes)\n",
    "std_runtime = np.std(runtimes)\n",
    "mean_div = np.mean(div_runs)\n",
    "std_div = np.std(div_runs)\n",
    "\n",
    "\n",
    "print(f\"\\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s\")\n",
    "print(f\"\\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "a61a7cc2",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Run 1: 3.247 seconds\n",
      "Run 2: 1.130 seconds\n",
      "Run 3: 1.133 seconds\n",
      "Run 4: 1.279 seconds\n",
      "Run 5: 1.207 seconds\n",
      "\n",
      "Average runtime: 1.60 ± 0.83 s\n",
      "\n",
      "Average Diverity: 26.50 ± 0.00 s\n"
     ]
    }
   ],
   "source": [
    "runtimes = []\n",
    "div_runs = []\n",
    "\n",
    "for i in range(5):\n",
    "    start_time = time.time()\n",
    "    \n",
    "    vs = vendi.score_K(sim_matrix)\n",
    "\n",
    "    end_time = time.time()\n",
    "    runtime = end_time - start_time\n",
    "    runtimes.append(runtime)\n",
    "    div_runs.append(vs)\n",
    "    \n",
    "    print(f\"Run {i+1}: {runtime:.3f} seconds\")\n",
    "\n",
    "mean_runtime = np.mean(runtimes)\n",
    "std_runtime = np.std(runtimes)\n",
    "mean_div = np.mean(div_runs)\n",
    "std_div = np.std(div_runs)\n",
    "\n",
    "\n",
    "print(f\"\\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s\")\n",
    "print(f\"\\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "804c14f6",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Run 1: 0.070 seconds\n",
      "Run 2: 0.022 seconds\n",
      "Run 3: 0.022 seconds\n",
      "Run 4: 0.021 seconds\n",
      "Run 5: 0.023 seconds\n",
      "\n",
      "Average runtime: 0.03 ± 0.02 s\n",
      "\n",
      "Average Diverity: 1.60 ± 0.00 s\n"
     ]
    }
   ],
   "source": [
    "runtimes = []\n",
    "div_runs = []\n",
    "\n",
    "for i in range(5):\n",
    "    start_time = time.time()\n",
    "    \n",
    "    dcs = dcscore(sim_matrix)\n",
    "\n",
    "    end_time = time.time()\n",
    "    runtime = end_time - start_time\n",
    "    runtimes.append(runtime)\n",
    "    div_runs.append(dcs)\n",
    "    \n",
    "    print(f\"Run {i+1}: {runtime:.3f} seconds\")\n",
    "\n",
    "mean_runtime = np.mean(runtimes)\n",
    "std_runtime = np.std(runtimes)\n",
    "mean_div = np.mean(div_runs)\n",
    "std_div = np.std(div_runs)\n",
    "\n",
    "\n",
    "print(f\"\\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s\")\n",
    "print(f\"\\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "f91aeab2",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Run 1: 203.488 seconds\n",
      "Run 2: 132.986 seconds\n",
      "Run 3: 144.664 seconds\n",
      "Run 4: 145.673 seconds\n",
      "Run 5: 197.732 seconds\n",
      "\n",
      "Average runtime: 164.91 ± 29.55 s\n",
      "\n",
      "Average Diverity: 1796.71 ± 0.00 s\n"
     ]
    }
   ],
   "source": [
    "runtimes = []\n",
    "div_runs = []\n",
    "\n",
    "for i in range(5):\n",
    "    start_time = time.time()\n",
    "    \n",
    "    div = Diversipy(Xs = [dist_matrix_sub],  ref_space=0, metric=\"precomputed\")\n",
    "    mag_areas = div.MagAreas()\n",
    "\n",
    "    end_time = time.time()\n",
    "    runtime = end_time - start_time\n",
    "    runtimes.append(runtime)\n",
    "    div_runs.append(mag_areas)\n",
    "    \n",
    "    print(f\"Run {i+1}: {runtime:.3f} seconds\")\n",
    "\n",
    "mean_runtime = np.mean(runtimes)\n",
    "std_runtime = np.std(runtimes)\n",
    "mean_div = np.mean(div_runs)\n",
    "std_div = np.std(div_runs)\n",
    "\n",
    "\n",
    "print(f\"\\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s\")\n",
    "print(f\"\\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "id": "8cf9cd53",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(10000, 10000)\n",
      "(10000, 10000)\n"
     ]
    }
   ],
   "source": [
    "dist_matrix = cosine_distances(embeddings)\n",
    "dist_matrix_c = dist_matrix.copy()\n",
    "dist_matrix_sub = dist_matrix_c[:10000, :10000]\n",
    "sim_matrix = 1 - dist_matrix_sub\n",
    "print(dist_matrix_sub.shape)\n",
    "print(sim_matrix.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "id": "6e4a8171",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Run 1: 24.350 seconds\n",
      "Run 2: 24.255 seconds\n",
      "Run 3: 24.432 seconds\n",
      "Run 4: 24.208 seconds\n",
      "Run 5: 24.406 seconds\n",
      "\n",
      "Average runtime: 24.33 ± 0.09 s\n",
      "\n",
      "Average Diverity: 78.01 ± 0.00 s\n"
     ]
    }
   ],
   "source": [
    "runtimes = []\n",
    "div_runs = []\n",
    "\n",
    "for i in range(5):\n",
    "    start_time = time.time()\n",
    "    \n",
    "    pldiv = fast_PLDiv_approx(dist_matrix_sub)\n",
    "\n",
    "    end_time = time.time()\n",
    "    runtime = end_time - start_time\n",
    "    runtimes.append(runtime)\n",
    "    div_runs.append(pldiv)\n",
    "    \n",
    "    print(f\"Run {i+1}: {runtime:.3f} seconds\")\n",
    "\n",
    "\n",
    "mean_runtime = np.mean(runtimes)\n",
    "std_runtime = np.std(runtimes)\n",
    "mean_div = np.mean(div_runs)\n",
    "std_div = np.std(div_runs)\n",
    "\n",
    "\n",
    "print(f\"\\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s\")\n",
    "print(f\"\\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "id": "87770705",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Run 1: 17.221 seconds\n",
      "Run 2: 17.249 seconds\n",
      "Run 3: 16.673 seconds\n",
      "Run 4: 16.442 seconds\n",
      "Run 5: 16.392 seconds\n",
      "\n",
      "Average runtime: 16.80 ± 0.37 s\n",
      "\n",
      "Average Diverity: 78.03 ± 0.01 s\n"
     ]
    }
   ],
   "source": [
    "runtimes = []\n",
    "div_runs = []\n",
    "\n",
    "for i in range(5):\n",
    "    start_time = time.time()\n",
    "    \n",
    "    pldiv = PLDiv_Sparse_MST(dist_matrix_sub, sparse=0.95)\n",
    "\n",
    "    end_time = time.time()\n",
    "    runtime = end_time - start_time\n",
    "    runtimes.append(runtime)\n",
    "    div_runs.append(pldiv)\n",
    "    \n",
    "    print(f\"Run {i+1}: {runtime:.3f} seconds\")\n",
    "\n",
    "mean_runtime = np.mean(runtimes)\n",
    "std_runtime = np.std(runtimes)\n",
    "mean_div = np.mean(div_runs)\n",
    "std_div = np.std(div_runs)\n",
    "\n",
    "\n",
    "print(f\"\\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s\")\n",
    "print(f\"\\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "id": "c05f56b8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Run 1: 9.851 seconds\n",
      "Run 2: 9.860 seconds\n",
      "Run 3: 9.798 seconds\n",
      "Run 4: 9.870 seconds\n",
      "Run 5: 9.956 seconds\n",
      "\n",
      "Average runtime: 9.87 ± 0.05 s\n",
      "\n",
      "Average Diverity: 79.70 ± 0.00 s\n"
     ]
    }
   ],
   "source": [
    "runtimes = []\n",
    "div_runs = []\n",
    "\n",
    "for i in range(5):\n",
    "    start_time = time.time()\n",
    "    \n",
    "    pldiv = PLDiv_Sparse_MST(dist_matrix_sub, sparse=10)\n",
    "\n",
    "    end_time = time.time()\n",
    "    runtime = end_time - start_time\n",
    "    runtimes.append(runtime)\n",
    "    div_runs.append(pldiv)\n",
    "    \n",
    "    print(f\"Run {i+1}: {runtime:.3f} seconds\")\n",
    "\n",
    "mean_runtime = np.mean(runtimes)\n",
    "std_runtime = np.std(runtimes)\n",
    "mean_div = np.mean(div_runs)\n",
    "std_div = np.std(div_runs)\n",
    "\n",
    "\n",
    "print(f\"\\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s\")\n",
    "print(f\"\\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "id": "9d30042d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Run 1: 7.875 seconds\n",
      "Run 2: 8.002 seconds\n",
      "Run 3: 13.901 seconds\n",
      "Run 4: 14.091 seconds\n",
      "Run 5: 10.242 seconds\n",
      "\n",
      "Average runtime: 10.82 ± 2.73 s\n",
      "\n",
      "Average Diverity: 26.79 ± 0.00 s\n"
     ]
    }
   ],
   "source": [
    "runtimes = []\n",
    "div_runs = []\n",
    "\n",
    "for i in range(5):\n",
    "    start_time = time.time()\n",
    "    \n",
    "    vs = vendi.score_K(sim_matrix)\n",
    "\n",
    "    end_time = time.time()\n",
    "    runtime = end_time - start_time\n",
    "    runtimes.append(runtime)\n",
    "    div_runs.append(vs)\n",
    "    \n",
    "    print(f\"Run {i+1}: {runtime:.3f} seconds\")\n",
    "\n",
    "mean_runtime = np.mean(runtimes)\n",
    "std_runtime = np.std(runtimes)\n",
    "mean_div = np.mean(div_runs)\n",
    "std_div = np.std(div_runs)\n",
    "\n",
    "\n",
    "print(f\"\\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s\")\n",
    "print(f\"\\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "id": "b84a4ea9",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Run 1: 0.069 seconds\n",
      "Run 2: 0.071 seconds\n",
      "Run 3: 0.067 seconds\n",
      "Run 4: 0.061 seconds\n",
      "Run 5: 0.062 seconds\n",
      "\n",
      "Average runtime: 0.07 ± 0.00 s\n",
      "\n",
      "Average Diverity: 1.60 ± 0.00 s\n"
     ]
    }
   ],
   "source": [
    "runtimes = []\n",
    "div_runs = []\n",
    "\n",
    "for i in range(5):\n",
    "    start_time = time.time()\n",
    "    \n",
    "    dcs = dcscore(sim_matrix)\n",
    "\n",
    "    end_time = time.time()\n",
    "    runtime = end_time - start_time\n",
    "    runtimes.append(runtime)\n",
    "    div_runs.append(dcs)\n",
    "    \n",
    "    print(f\"Run {i+1}: {runtime:.3f} seconds\")\n",
    "\n",
    "mean_runtime = np.mean(runtimes)\n",
    "std_runtime = np.std(runtimes)\n",
    "mean_div = np.mean(div_runs)\n",
    "std_div = np.std(div_runs)\n",
    "\n",
    "\n",
    "print(f\"\\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s\")\n",
    "print(f\"\\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "id": "74838694",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Run 1: 677.474 seconds\n",
      "Run 2: 687.035 seconds\n",
      "Run 3: 717.031 seconds\n",
      "Run 4: 761.756 seconds\n",
      "Run 5: 737.409 seconds\n",
      "\n",
      "Average runtime: 716.14 ± 31.23 s\n",
      "\n",
      "Average Diverity: 3642.29 ± 0.00 s\n"
     ]
    }
   ],
   "source": [
    "runtimes = []\n",
    "div_runs = []\n",
    "\n",
    "for i in range(5):\n",
    "    start_time = time.time()\n",
    "    \n",
    "    div = Diversipy(Xs = [dist_matrix_sub],  ref_space=0, metric=\"precomputed\")\n",
    "    mag_areas = div.MagAreas()\n",
    "\n",
    "    end_time = time.time()\n",
    "    runtime = end_time - start_time\n",
    "    runtimes.append(runtime)\n",
    "    div_runs.append(mag_areas)\n",
    "    \n",
    "    print(f\"Run {i+1}: {runtime:.3f} seconds\")\n",
    "\n",
    "mean_runtime = np.mean(runtimes)\n",
    "std_runtime = np.std(runtimes)\n",
    "mean_div = np.mean(div_runs)\n",
    "std_div = np.std(div_runs)\n",
    "\n",
    "\n",
    "print(f\"\\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s\")\n",
    "print(f\"\\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "id": "b5d53eb7",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(20000, 20000)\n",
      "(20000, 20000)\n"
     ]
    }
   ],
   "source": [
    "dist_matrix_sub = dist_matrix_c[:20000, :20000]\n",
    "sim_matrix = 1 - dist_matrix_sub\n",
    "print(dist_matrix_sub.shape)\n",
    "print(sim_matrix.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "id": "2b212fd9",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Run 1: 105.915 seconds\n",
      "Run 2: 105.826 seconds\n",
      "Run 3: 104.936 seconds\n",
      "Run 4: 105.707 seconds\n",
      "Run 5: 105.700 seconds\n",
      "\n",
      "Average runtime: 105.62 ± 0.35 s\n",
      "\n",
      "Average Diverity: 133.55 ± 0.00 s\n"
     ]
    }
   ],
   "source": [
    "runtimes = []\n",
    "div_runs = []\n",
    "\n",
    "for i in range(5):\n",
    "    start_time = time.time()\n",
    "    \n",
    "    pldiv = fast_PLDiv_approx(dist_matrix_sub)\n",
    "\n",
    "    end_time = time.time()\n",
    "    runtime = end_time - start_time\n",
    "    runtimes.append(runtime)\n",
    "    div_runs.append(pldiv)\n",
    "    \n",
    "    print(f\"Run {i+1}: {runtime:.3f} seconds\")\n",
    "\n",
    "\n",
    "mean_runtime = np.mean(runtimes)\n",
    "std_runtime = np.std(runtimes)\n",
    "mean_div = np.mean(div_runs)\n",
    "std_div = np.std(div_runs)\n",
    "\n",
    "\n",
    "print(f\"\\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s\")\n",
    "print(f\"\\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "id": "7c0d9d8b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Run 1: 71.818 seconds\n",
      "Run 2: 70.010 seconds\n",
      "Run 3: 68.473 seconds\n",
      "Run 4: 66.892 seconds\n",
      "Run 5: 65.557 seconds\n",
      "\n",
      "Average runtime: 68.55 ± 2.21 s\n",
      "\n",
      "Average Diverity: 133.58 ± 0.03 s\n"
     ]
    }
   ],
   "source": [
    "runtimes = []\n",
    "div_runs = []\n",
    "\n",
    "for i in range(5):\n",
    "    start_time = time.time()\n",
    "    \n",
    "    pldiv = PLDiv_Sparse_MST(dist_matrix_sub, sparse=0.95)\n",
    "\n",
    "    end_time = time.time()\n",
    "    runtime = end_time - start_time\n",
    "    runtimes.append(runtime)\n",
    "    div_runs.append(pldiv)\n",
    "    \n",
    "    print(f\"Run {i+1}: {runtime:.3f} seconds\")\n",
    "\n",
    "mean_runtime = np.mean(runtimes)\n",
    "std_runtime = np.std(runtimes)\n",
    "mean_div = np.mean(div_runs)\n",
    "std_div = np.std(div_runs)\n",
    "\n",
    "\n",
    "print(f\"\\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s\")\n",
    "print(f\"\\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "id": "dc5429d3",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Run 1: 33.740 seconds\n",
      "Run 2: 33.759 seconds\n",
      "Run 3: 33.740 seconds\n",
      "Run 4: 33.731 seconds\n",
      "Run 5: 33.727 seconds\n",
      "\n",
      "Average runtime: 33.74 ± 0.01 s\n",
      "\n",
      "Average Diverity: 136.86 ± 0.03 s\n"
     ]
    }
   ],
   "source": [
    "runtimes = []\n",
    "div_runs = []\n",
    "\n",
    "for i in range(5):\n",
    "    start_time = time.time()\n",
    "    \n",
    "    pldiv = PLDiv_Sparse_MST(dist_matrix_sub, sparse=10)\n",
    "\n",
    "    end_time = time.time()\n",
    "    runtime = end_time - start_time\n",
    "    runtimes.append(runtime)\n",
    "    div_runs.append(pldiv)\n",
    "    \n",
    "    print(f\"Run {i+1}: {runtime:.3f} seconds\")\n",
    "\n",
    "mean_runtime = np.mean(runtimes)\n",
    "std_runtime = np.std(runtimes)\n",
    "mean_div = np.mean(div_runs)\n",
    "std_div = np.std(div_runs)\n",
    "\n",
    "\n",
    "print(f\"\\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s\")\n",
    "print(f\"\\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "id": "4219d822",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Run 1: 165.765 seconds\n",
      "Run 2: 190.797 seconds\n",
      "Run 3: 170.915 seconds\n",
      "Run 4: 196.993 seconds\n",
      "Run 5: 194.541 seconds\n",
      "\n",
      "Average runtime: 183.80 ± 12.88 s\n",
      "\n",
      "Average Diverity: 26.89 ± 0.00 s\n"
     ]
    }
   ],
   "source": [
    "runtimes = []\n",
    "div_runs = []\n",
    "\n",
    "for i in range(5):\n",
    "    start_time = time.time()\n",
    "    \n",
    "    vs = vendi.score_K(sim_matrix)\n",
    "\n",
    "    end_time = time.time()\n",
    "    runtime = end_time - start_time\n",
    "    runtimes.append(runtime)\n",
    "    div_runs.append(vs)\n",
    "    \n",
    "    print(f\"Run {i+1}: {runtime:.3f} seconds\")\n",
    "\n",
    "mean_runtime = np.mean(runtimes)\n",
    "std_runtime = np.std(runtimes)\n",
    "mean_div = np.mean(div_runs)\n",
    "std_div = np.std(div_runs)\n",
    "\n",
    "\n",
    "print(f\"\\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s\")\n",
    "print(f\"\\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "id": "b7243f54",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Run 1: 0.219 seconds\n",
      "Run 2: 0.228 seconds\n",
      "Run 3: 0.220 seconds\n",
      "Run 4: 0.218 seconds\n",
      "Run 5: 0.228 seconds\n",
      "\n",
      "Average runtime: 0.22 ± 0.00 s\n",
      "\n",
      "Average Diverity: 1.60 ± 0.00 s\n"
     ]
    }
   ],
   "source": [
    "runtimes = []\n",
    "div_runs = []\n",
    "\n",
    "for i in range(5):\n",
    "    start_time = time.time()\n",
    "    \n",
    "    dcs = dcscore(sim_matrix)\n",
    "\n",
    "    end_time = time.time()\n",
    "    runtime = end_time - start_time\n",
    "    runtimes.append(runtime)\n",
    "    div_runs.append(dcs)\n",
    "    \n",
    "    print(f\"Run {i+1}: {runtime:.3f} seconds\")\n",
    "\n",
    "mean_runtime = np.mean(runtimes)\n",
    "std_runtime = np.std(runtimes)\n",
    "mean_div = np.mean(div_runs)\n",
    "std_div = np.std(div_runs)\n",
    "\n",
    "\n",
    "print(f\"\\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s\")\n",
    "print(f\"\\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c5ed08b1",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(30000, 30000)\n",
      "(30000, 30000)\n"
     ]
    }
   ],
   "source": [
    "dist_matrix = cosine_distances(embeddings)\n",
    "dist_matrix_sub = dist_matrix[:30000, :30000]\n",
    "sim_matrix = 1 - dist_matrix_sub\n",
    "print(dist_matrix_sub.shape)\n",
    "print(sim_matrix.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "id": "a47be6df",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Run 1: 237.205 seconds\n",
      "Run 2: 236.378 seconds\n",
      "Run 3: 234.966 seconds\n",
      "Run 4: 236.710 seconds\n",
      "Run 5: 235.884 seconds\n",
      "\n",
      "Average runtime: 236.23 ± 0.76 s\n",
      "\n",
      "Average Diverity: 184.93 ± 0.00 s\n"
     ]
    }
   ],
   "source": [
    "runtimes = []\n",
    "div_runs = []\n",
    "\n",
    "for i in range(5):\n",
    "    start_time = time.time()\n",
    "    \n",
    "    pldiv = fast_PLDiv_approx(dist_matrix_sub)\n",
    "\n",
    "    end_time = time.time()\n",
    "    runtime = end_time - start_time\n",
    "    runtimes.append(runtime)\n",
    "    div_runs.append(pldiv)\n",
    "    \n",
    "    print(f\"Run {i+1}: {runtime:.3f} seconds\")\n",
    "\n",
    "\n",
    "mean_runtime = np.mean(runtimes)\n",
    "std_runtime = np.std(runtimes)\n",
    "mean_div = np.mean(div_runs)\n",
    "std_div = np.std(div_runs)\n",
    "\n",
    "\n",
    "print(f\"\\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s\")\n",
    "print(f\"\\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "id": "d0d81458",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Run 1: 157.103 seconds\n",
      "Run 2: 151.098 seconds\n",
      "Run 3: 148.060 seconds\n",
      "Run 4: 142.793 seconds\n",
      "Run 5: 138.366 seconds\n",
      "\n",
      "Average runtime: 147.48 ± 6.50 s\n",
      "\n",
      "Average Diverity: 184.92 ± 0.02 s\n"
     ]
    }
   ],
   "source": [
    "runtimes = []\n",
    "div_runs = []\n",
    "\n",
    "for i in range(5):\n",
    "    start_time = time.time()\n",
    "    \n",
    "    pldiv = PLDiv_Sparse_MST(dist_matrix_sub, sparse=0.95)\n",
    "\n",
    "    end_time = time.time()\n",
    "    runtime = end_time - start_time\n",
    "    runtimes.append(runtime)\n",
    "    div_runs.append(pldiv)\n",
    "    \n",
    "    print(f\"Run {i+1}: {runtime:.3f} seconds\")\n",
    "\n",
    "mean_runtime = np.mean(runtimes)\n",
    "std_runtime = np.std(runtimes)\n",
    "mean_div = np.mean(div_runs)\n",
    "std_div = np.std(div_runs)\n",
    "\n",
    "\n",
    "print(f\"\\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s\")\n",
    "print(f\"\\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "id": "d3a730c3",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Run 1: 67.577 seconds\n",
      "Run 2: 67.867 seconds\n",
      "Run 3: 67.387 seconds\n",
      "Run 4: 69.484 seconds\n",
      "Run 5: 68.442 seconds\n",
      "\n",
      "Average runtime: 68.15 ± 0.76 s\n",
      "\n",
      "Average Diverity: 190.23 ± 0.06 s\n"
     ]
    }
   ],
   "source": [
    "runtimes = []\n",
    "div_runs = []\n",
    "\n",
    "for i in range(5):\n",
    "    start_time = time.time()\n",
    "    \n",
    "    pldiv = PLDiv_Sparse_MST(dist_matrix_sub, sparse=10)\n",
    "\n",
    "    end_time = time.time()\n",
    "    runtime = end_time - start_time\n",
    "    runtimes.append(runtime)\n",
    "    div_runs.append(pldiv)\n",
    "    \n",
    "    print(f\"Run {i+1}: {runtime:.3f} seconds\")\n",
    "\n",
    "mean_runtime = np.mean(runtimes)\n",
    "std_runtime = np.std(runtimes)\n",
    "mean_div = np.mean(div_runs)\n",
    "std_div = np.std(div_runs)\n",
    "\n",
    "\n",
    "print(f\"\\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s\")\n",
    "print(f\"\\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "id": "e382676b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(30000, 30000)\n",
      "(30000, 30000)\n"
     ]
    }
   ],
   "source": [
    "dist_matrix_sub = dist_matrix[:30000, :30000]\n",
    "sim_matrix = 1 - dist_matrix_sub\n",
    "print(dist_matrix_sub.shape)\n",
    "print(sim_matrix.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "id": "a7c74edc",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Run 1: 775.922 seconds\n",
      "Run 2: 711.619 seconds\n",
      "Run 3: 714.985 seconds\n",
      "Run 4: 743.062 seconds\n",
      "Run 5: 786.946 seconds\n",
      "\n",
      "Average runtime: 746.51 ± 30.74 s\n",
      "\n",
      "Average Diverity: 26.94 ± 0.00 s\n"
     ]
    }
   ],
   "source": [
    "runtimes = []\n",
    "div_runs = []\n",
    "\n",
    "for i in range(5):\n",
    "    start_time = time.time()\n",
    "    \n",
    "    vs = vendi.score_K(sim_matrix)\n",
    "\n",
    "    end_time = time.time()\n",
    "    runtime = end_time - start_time\n",
    "    runtimes.append(runtime)\n",
    "    div_runs.append(vs)\n",
    "    \n",
    "    print(f\"Run {i+1}: {runtime:.3f} seconds\")\n",
    "\n",
    "mean_runtime = np.mean(runtimes)\n",
    "std_runtime = np.std(runtimes)\n",
    "mean_div = np.mean(div_runs)\n",
    "std_div = np.std(div_runs)\n",
    "\n",
    "\n",
    "print(f\"\\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s\")\n",
    "print(f\"\\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "id": "01fca032",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Run 1: 0.489 seconds\n",
      "Run 2: 0.463 seconds\n",
      "Run 3: 0.448 seconds\n",
      "Run 4: 0.465 seconds\n",
      "Run 5: 0.453 seconds\n",
      "\n",
      "Average runtime: 0.46 ± 0.01 s\n",
      "\n",
      "Average Diverity: 1.60 ± 0.00 s\n"
     ]
    }
   ],
   "source": [
    "runtimes = []\n",
    "div_runs = []\n",
    "\n",
    "for i in range(5):\n",
    "    start_time = time.time()\n",
    "    \n",
    "    dcs = dcscore(sim_matrix)\n",
    "\n",
    "    end_time = time.time()\n",
    "    runtime = end_time - start_time\n",
    "    runtimes.append(runtime)\n",
    "    div_runs.append(dcs)\n",
    "    \n",
    "    print(f\"Run {i+1}: {runtime:.3f} seconds\")\n",
    "\n",
    "mean_runtime = np.mean(runtimes)\n",
    "std_runtime = np.std(runtimes)\n",
    "mean_div = np.mean(div_runs)\n",
    "std_div = np.std(div_runs)\n",
    "\n",
    "\n",
    "print(f\"\\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s\")\n",
    "print(f\"\\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5055f8be",
   "metadata": {},
   "outputs": [],
   "source": [
    "runtimes = []\n",
    "div_runs = []\n",
    "\n",
    "for i in range(5):\n",
    "    start_time = time.time()\n",
    "    \n",
    "    div = Diversipy(Xs = [dist_matrix_sub],  ref_space=0, metric=\"precomputed\")\n",
    "    mag_areas = div.MagAreas()\n",
    "\n",
    "    end_time = time.time()\n",
    "    runtime = end_time - start_time\n",
    "    runtimes.append(runtime)\n",
    "    div_runs.append(mag_areas)\n",
    "    \n",
    "    print(f\"Run {i+1}: {runtime:.3f} seconds\")\n",
    "\n",
    "mean_runtime = np.mean(runtimes)\n",
    "std_runtime = np.std(runtimes)\n",
    "mean_div = np.mean(div_runs)\n",
    "std_div = np.std(div_runs)\n",
    "\n",
    "\n",
    "print(f\"\\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s\")\n",
    "print(f\"\\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "id": "6863d2d1",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(40000, 40000)\n",
      "(40000, 40000)\n"
     ]
    }
   ],
   "source": [
    "dist_matrix = cosine_distances(embeddings)\n",
    "dist_matrix_sub = dist_matrix[:40000, :40000]\n",
    "sim_matrix = 1 - dist_matrix_sub\n",
    "print(dist_matrix_sub.shape)\n",
    "print(sim_matrix.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "id": "6bb761e9",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Run 1: 463.522 seconds\n",
      "Run 2: 462.247 seconds\n",
      "Run 3: 462.063 seconds\n",
      "Run 4: 462.685 seconds\n",
      "Run 5: 463.241 seconds\n",
      "\n",
      "Average runtime: 462.75 ± 0.56 s\n",
      "\n",
      "Average Diverity: 232.89 ± 0.00 s\n"
     ]
    }
   ],
   "source": [
    "runtimes = []\n",
    "div_runs = []\n",
    "\n",
    "for i in range(5):\n",
    "    start_time = time.time()\n",
    "    \n",
    "    pldiv = fast_PLDiv_approx(dist_matrix_sub)\n",
    "\n",
    "    end_time = time.time()\n",
    "    runtime = end_time - start_time\n",
    "    runtimes.append(runtime)\n",
    "    div_runs.append(pldiv)\n",
    "    \n",
    "    print(f\"Run {i+1}: {runtime:.3f} seconds\")\n",
    "\n",
    "\n",
    "mean_runtime = np.mean(runtimes)\n",
    "std_runtime = np.std(runtimes)\n",
    "mean_div = np.mean(div_runs)\n",
    "std_div = np.std(div_runs)\n",
    "\n",
    "\n",
    "print(f\"\\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s\")\n",
    "print(f\"\\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "id": "54990e4a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Run 1: 294.518 seconds\n",
      "Run 2: 284.087 seconds\n",
      "Run 3: 273.096 seconds\n",
      "Run 4: 263.410 seconds\n",
      "Run 5: 254.168 seconds\n",
      "\n",
      "Average runtime: 273.86 ± 14.35 s\n",
      "\n",
      "Average Diverity: 232.89 ± 0.06 s\n"
     ]
    }
   ],
   "source": [
    "runtimes = []\n",
    "div_runs = []\n",
    "\n",
    "for i in range(5):\n",
    "    start_time = time.time()\n",
    "    \n",
    "    pldiv = PLDiv_Sparse_MST(dist_matrix_sub, sparse=0.95)\n",
    "\n",
    "    end_time = time.time()\n",
    "    runtime = end_time - start_time\n",
    "    runtimes.append(runtime)\n",
    "    div_runs.append(pldiv)\n",
    "    \n",
    "    print(f\"Run {i+1}: {runtime:.3f} seconds\")\n",
    "\n",
    "mean_runtime = np.mean(runtimes)\n",
    "std_runtime = np.std(runtimes)\n",
    "mean_div = np.mean(div_runs)\n",
    "std_div = np.std(div_runs)\n",
    "\n",
    "\n",
    "print(f\"\\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s\")\n",
    "print(f\"\\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "id": "422af28a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Run 1: 115.714 seconds\n",
      "Run 2: 115.105 seconds\n",
      "Run 3: 115.512 seconds\n",
      "Run 4: 115.571 seconds\n",
      "Run 5: 115.802 seconds\n",
      "\n",
      "Average runtime: 115.54 ± 0.24 s\n",
      "\n",
      "Average Diverity: 240.04 ± 0.27 s\n"
     ]
    }
   ],
   "source": [
    "runtimes = []\n",
    "div_runs = []\n",
    "\n",
    "for i in range(5):\n",
    "    start_time = time.time()\n",
    "    \n",
    "    pldiv = PLDiv_Sparse_MST(dist_matrix_sub, sparse=10)\n",
    "\n",
    "    end_time = time.time()\n",
    "    runtime = end_time - start_time\n",
    "    runtimes.append(runtime)\n",
    "    div_runs.append(pldiv)\n",
    "    \n",
    "    print(f\"Run {i+1}: {runtime:.3f} seconds\")\n",
    "\n",
    "mean_runtime = np.mean(runtimes)\n",
    "std_runtime = np.std(runtimes)\n",
    "mean_div = np.mean(div_runs)\n",
    "std_div = np.std(div_runs)\n",
    "\n",
    "\n",
    "print(f\"\\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s\")\n",
    "print(f\"\\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "id": "a3119105",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Run 1: 1705.192 seconds\n",
      "Run 2: 1464.143 seconds\n",
      "Run 3: 1890.320 seconds\n",
      "Run 4: 1984.384 seconds\n",
      "Run 5: 1886.496 seconds\n",
      "\n",
      "Average runtime: 1786.11 ± 184.64 s\n",
      "\n",
      "Average Diverity: 26.96 ± 0.00 s\n"
     ]
    }
   ],
   "source": [
    "runtimes = []\n",
    "div_runs = []\n",
    "\n",
    "for i in range(5):\n",
    "    start_time = time.time()\n",
    "    \n",
    "    vs = vendi.score_K(sim_matrix)\n",
    "\n",
    "    end_time = time.time()\n",
    "    runtime = end_time - start_time\n",
    "    runtimes.append(runtime)\n",
    "    div_runs.append(vs)\n",
    "    \n",
    "    print(f\"Run {i+1}: {runtime:.3f} seconds\")\n",
    "\n",
    "mean_runtime = np.mean(runtimes)\n",
    "std_runtime = np.std(runtimes)\n",
    "mean_div = np.mean(div_runs)\n",
    "std_div = np.std(div_runs)\n",
    "\n",
    "\n",
    "print(f\"\\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s\")\n",
    "print(f\"\\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "id": "afb39846",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Run 1: 1.001 seconds\n",
      "Run 2: 0.920 seconds\n",
      "Run 3: 1.000 seconds\n",
      "Run 4: 0.907 seconds\n",
      "Run 5: 0.888 seconds\n",
      "\n",
      "Average runtime: 0.94 ± 0.05 s\n",
      "\n",
      "Average Diverity: 1.60 ± 0.00 s\n"
     ]
    }
   ],
   "source": [
    "runtimes = []\n",
    "div_runs = []\n",
    "\n",
    "for i in range(5):\n",
    "    start_time = time.time()\n",
    "    \n",
    "    dcs = dcscore(sim_matrix)\n",
    "\n",
    "    end_time = time.time()\n",
    "    runtime = end_time - start_time\n",
    "    runtimes.append(runtime)\n",
    "    div_runs.append(dcs)\n",
    "    \n",
    "    print(f\"Run {i+1}: {runtime:.3f} seconds\")\n",
    "\n",
    "mean_runtime = np.mean(runtimes)\n",
    "std_runtime = np.std(runtimes)\n",
    "mean_div = np.mean(div_runs)\n",
    "std_div = np.std(div_runs)\n",
    "\n",
    "\n",
    "print(f\"\\nAverage runtime: {mean_runtime:.2f} ± {std_runtime:.2f} s\")\n",
    "print(f\"\\nAverage Diverity: {mean_div:.2f} ± {std_div:.2f} s\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "py311",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
