{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f198f51c",
   "metadata": {},
   "outputs": [],
   "source": [
    "### Kernel k-means\n",
    "\n",
    "### This is a naive implementation for the limited purposes of this paper.\n",
    "\n",
    "def kerneldist(Kmat, y, x):\n",
    "    \n",
    "    seq = np.arange(Kmat.shape[0])\n",
    "    n_clusters = np.arange(len(np.unique(y)))\n",
    "    x_dist = np.zeros(len(n_clusters))\n",
    "    \n",
    "    for cluster in n_clusters:\n",
    "        index = seq[y == cluster]\n",
    "        if len(index)==0:\n",
    "            x_dist[cluster] = 0\n",
    "        else:\n",
    "            x_dist[cluster] = (Kmat[x,x] + np.mean(Kmat[np.ix_(index,index)])-2*np.mean(Kmat[np.ix_(index,[x])]))\n",
    "    \n",
    "    return x_dist\n",
    "\n",
    "def kernelkmeanscost(Kmat,y):\n",
    "    \n",
    "    seq = np.arange(Kmat.shape[0])\n",
    "    n_clusters = np.arange(len(np.unique(y)))\n",
    "    costs = np.zeros(len(n_clusters))\n",
    "    \n",
    "    for cluster in n_clusters:\n",
    "        index = seq[y == cluster]\n",
    "        if len(index)==0:\n",
    "            costs[cluster]=0\n",
    "        else:\n",
    "            costs[cluster] = (np.sum(np.diag(Kmat[np.ix_(index,index)]))-(1/len(index))*np.sum(Kmat[np.ix_(index,index)]))\n",
    "\n",
    "    return(costs)\n",
    "\n",
    "def kernelkmeans(Kmat, n_clusters, algo, n_init = 10, n_iter = 100, silent = True):\n",
    "    \n",
    "    n_data = Kmat.shape[0]\n",
    "    best_y = np.zeros(n_data)\n",
    "    \n",
    "    if algo == 'kmeans':\n",
    "        Phi = Kmat\n",
    "        kmeans = KMeans(n_clusters=n_clusters, n_init=n_init)\n",
    "        kmeans.fit(Phi)\n",
    "        best_y = kmeans.predict(Phi)\n",
    "        centers = kmeans.cluster_centers_\n",
    "        \n",
    "        return(best_y, centers)\n",
    "        \n",
    "    elif algo=='kernelkmeans':\n",
    "        best_cost = float('inf')\n",
    "        \n",
    "        for t in np.arange(n_init):\n",
    "            ### initialize clusters\n",
    "            if silent == False:\n",
    "                print('Initialization #',t)\n",
    "            y = rng.choice(n_clusters, n_data)\n",
    "            converged = False\n",
    "            it = 0\n",
    "            \n",
    "            while converged == False:\n",
    "                ### assign to closest center\n",
    "                c = np.arange(n_data)\n",
    "                for x in range(n_data):\n",
    "                    c[x] = np.argmin(kerneldist(Kmat,y,x))\n",
    "                if np.array_equal(c,y)==True or it == n_iter:\n",
    "                    converged = True\n",
    "                    if silent == False:\n",
    "                        print('Converged at', it)\n",
    "                else:\n",
    "                    y = c\n",
    "                    it = it + 1\n",
    "                    \n",
    "            cost_t = np.sum(kernelkmeanscost(Kmat, y))\n",
    "            \n",
    "            if cost_t < best_cost:\n",
    "                best_y = y\n",
    "                best_cost = cost_t\n",
    "        \n",
    "        return(best_y)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
