{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "461bdb5a-9062-4799-9a88-8903745c4e49",
   "metadata": {},
   "source": [
    "### Python packages used in this code"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "15d3693c-b977-4622-a602-45908a8d5bfd",
   "metadata": {},
   "outputs": [],
   "source": [
    "import platform\n",
    "import sys\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib\n",
    "import matplotlib.pyplot as plt\n",
    "import os\n",
    "import random\n",
    "import sklearn\n",
    "from sklearn.metrics.pairwise import pairwise_kernels, euclidean_distances\n",
    "from sklearn.gaussian_process.kernels import Matern\n",
    "import seaborn as sns\n",
    "import scipy\n",
    "from scipy.optimize import curve_fit, minimize\n",
    "from scipy.stats import ortho_group\n",
    "import time\n",
    "import warnings\n",
    "\n",
    "%matplotlib inline\n",
    "warnings.simplefilter('ignore')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1e08f032-d21c-4286-a2e9-7b0315001e28",
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\"\n",
    "Environments\n",
    "\n",
    "--Platform--\n",
    "OS : Windows-10-10.0.19044-SP0\n",
    "--Version--\n",
    "python :  3.9.12 (main, Apr  4 2022, 05:22:27) [MSC v.1916 64 bit (AMD64)]\n",
    "numpy : 1.23.1\n",
    "pandas : 1.4.3\n",
    "sklearn : 1.1.1\n",
    "scipy : 1.8.1\n",
    "\"\"\"\n",
    "\n",
    "print('--Platform--')\n",
    "print('OS :', platform.platform())\n",
    "print('--Version--')\n",
    "print('python : ', sys.version)\n",
    "print('numpy :', np.__version__)\n",
    "print('pandas :', pd.__version__)\n",
    "print('sklearn :', sklearn.__version__)\n",
    "print('scipy :', scipy.__version__)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ef64f159-c3c5-4218-802f-43d3602432cf",
   "metadata": {},
   "source": [
    "# Preparation"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4430de72-c480-4c3b-8a23-fbad4fe17b7f",
   "metadata": {},
   "source": [
    "## fix_seed function"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9140f5bd-a188-4c6c-9b5a-bcc5fcffdeda",
   "metadata": {},
   "outputs": [],
   "source": [
    "def fix_seed(seed):\n",
    "    # Numpy\n",
    "    np.random.seed(seed)\n",
    "    # for built-in random\n",
    "    random.seed(seed)\n",
    "    # for hash seed\n",
    "    os.environ[\"PYTHONHASHSEED\"] = str(seed)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2e80bb10-e82c-4ef7-8325-81a0f9ff992f",
   "metadata": {},
   "source": [
    "## Functions for caluculate dacey rates"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1940b052-4ad8-43f0-969a-ba50133b9e56",
   "metadata": {},
   "outputs": [],
   "source": [
    "def decay_func(x,a,b):\n",
    "    \"\"\"\n",
    "    Decay rate function\n",
    "    \"\"\"\n",
    "    return  b * x**(-np.float64(a))\n",
    "\n",
    "def decay_loss(a, b ,x, y):\n",
    "    \"\"\"\n",
    "    Objective function to be optimized\n",
    "    \"\"\"\n",
    "    residual = decay_func(x,a,b)-y\n",
    "    loss = np.vectorize(base_loss)(residual)\n",
    "    return np.sum(loss)\n",
    "\n",
    "def base_loss(x):\n",
    "    \"\"\"\n",
    "    Loss function\n",
    "        In order to upper bound the eigenvalues, the curve is defined such that losses are higher when the curve is below the eigenvalues.\n",
    "    \"\"\"\n",
    "    if x < 0:\n",
    "        10**10\n",
    "    else:\n",
    "        return x**2"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cf883db4-7366-4412-bbe8-a099b892e779",
   "metadata": {},
   "source": [
    "## Create output directories"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ebdacb79-68e4-4ccd-afa0-70f64a373919",
   "metadata": {},
   "outputs": [],
   "source": [
    "if not os.path.isdir('../30_Output/20_Plot/100_CheckEigenvalues/100_GramMatrix'):\n",
    "    os.makedirs('../30_Output/20_Plot/100_CheckEigenvalues/100_GramMatrix')\n",
    "if not os.path.isdir('../30_Output/20_Plot/100_CheckEigenvalues/110_Eigenvalue'):\n",
    "    os.makedirs('../30_Output/20_Plot/100_CheckEigenvalues/110_Eigenvalue')\n",
    "if not os.path.isdir('../30_Output/30_csv/100_CheckEigenvalues'):\n",
    "    os.makedirs('../30_Output/30_csv/100_CheckEigenvalues')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ea66ffbb-9bb0-426f-ae71-4bd64912af6e",
   "metadata": {},
   "source": [
    "# Main codes"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9c8fd4e6-ff99-47e8-9774-72f53a99b0f4",
   "metadata": {},
   "source": [
    "## Setting"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d6fd0ee6-f443-4571-877c-09a560af6f1e",
   "metadata": {},
   "outputs": [],
   "source": [
    "SEED = 373\n",
    "n_samples = 100\n",
    "dim_x = 10\n",
    "n_Basis = 10\n",
    "n_BasisDupl_list = np.arange(0, 11)\n",
    "itr_list = np.arange(0, 100)\n",
    "kernel_list = ['linear', 0.5, 1.5, 2.5, np.inf]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c8f79f63-5886-4974-9cf4-a877b850e53d",
   "metadata": {},
   "source": [
    "## Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "28574ae9-8fc7-4719-b01d-1f3f2bdeb6eb",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_result = pd.DataFrame([], columns=['kernel_x', 'kernel_xs', 'BasisDupl', 'itr', 's_xs', 's_x', 's_xxs'])\n",
    "t0=time.time()\n",
    "# Loop for the kernel for k3\n",
    "for kernel_x in kernel_list:\n",
    "    # Loop for the kernel for k2\n",
    "    for kernel_xs in kernel_list:\n",
    "        # Loop for the number of the duplicated bases\n",
    "        for n_BasisDupl in n_BasisDupl_list:\n",
    "            t_start=time.time()\n",
    "            for itr in itr_list:\n",
    "                fix_seed(itr)\n",
    "                x = ortho_group.rvs(n_samples)\n",
    "                basis1 = x[:,:n_Basis]\n",
    "                basis2 = x[:,(n_Basis-n_BasisDupl):(2*n_Basis-n_BasisDupl)] \n",
    "\n",
    "                fix_seed(itr)\n",
    "                A1 = np.random.randn(n_Basis, dim_x)\n",
    "                A2 = np.random.randn(n_Basis, dim_x)\n",
    "                X = pd.DataFrame(basis1.dot(A1))\n",
    "                Xs = pd.DataFrame(basis2.dot(A2))\n",
    "                X = (X-X.mean())/X.std()\n",
    "                Xs = (Xs-Xs.mean())/Xs.std()\n",
    "\n",
    "                if kernel_x=='linear':\n",
    "                    gram_X = pairwise_kernels(X, X, metric='linear')/(2*dim_x)+1\n",
    "                else:\n",
    "                    gram_X = Matern(length_scale=np.sqrt(dim_x), nu=kernel_x)(X)\n",
    "                if kernel_xs=='linear':\n",
    "                    gram_Xs = pairwise_kernels(Xs, Xs, metric='linear')/(2*dim_x)+1\n",
    "                else:\n",
    "                    gram_Xs = Matern(length_scale=np.sqrt(dim_x), nu=kernel_xs)(Xs)\n",
    "                gram_XXs = gram_X * gram_Xs\n",
    "\n",
    "                # Eigenvelues of gram matrices\n",
    "                eig_x = np.sort(np.linalg.eigh(gram_X)[0].real)[::-1]\n",
    "                eig_xs = np.sort(np.linalg.eigh(gram_Xs)[0].real)[::-1]\n",
    "                eig_xxs = np.sort(np.linalg.eigh(gram_XXs)[0].real)[::-1]\n",
    "\n",
    "                # Estimate the optimal decay rate\n",
    "                i_vec = np.arange(n_samples)+1 \n",
    "                result_opt_x = minimize(decay_loss, [1], args=(np.sum(np.diag(gram_X)), i_vec, eig_x), method='Nelder-Mead', options={'maxiter':1e+5})\n",
    "                result_opt_xs = minimize(decay_loss, [1], args=(np.sum(np.diag(gram_Xs)), i_vec, eig_xs), method='Nelder-Mead', options={'maxiter':1e+5})\n",
    "                result_opt_xxs = minimize(decay_loss, [1], args=(np.sum(np.diag(gram_XXs)), i_vec, eig_xxs), method='Nelder-Mead', options={'maxiter':1e+5})\n",
    "\n",
    "                # Curve for plot\n",
    "                i_vec_plt = np.linspace(1, n_samples, 1000)\n",
    "                decay_x = decay_func(x=i_vec_plt, a=result_opt_x.x[0], b=np.sum(np.diag(gram_X)))\n",
    "                decay_xs = decay_func(x=i_vec_plt, a=result_opt_xs.x[0], b=np.sum(np.diag(gram_Xs)))\n",
    "                decay_xxs = decay_func(x=i_vec_plt, a=result_opt_xxs.x[0], b=np.sum(np.diag(gram_XXs)))\n",
    "\n",
    "                # Save\n",
    "                df_result.loc[str(kernel_x)+'-'+str(kernel_xs)+'-b'+str(n_BasisDupl)+'-i'+str(itr)] = [kernel_x, kernel_xs, n_BasisDupl, itr, 1/result_opt_xs.x[0], 1/result_opt_x.x[0], 1/result_opt_xxs.x[0]]\n",
    "\n",
    "                if itr == itr_list[-1]:\n",
    "                    print('\\r'+'kernel_x :', kernel_x, ',  kernel_xs :', kernel_xs, '  n_BasisDupl :', n_BasisDupl, ':   ',str(itr+1)+'/'+str(len(itr_list)), '   ('+str(round(time.time()-t_start, 1))+'s)')\n",
    "                elif itr == itr_list[0]:\n",
    "                    print('\\r'+'kernel_x :', kernel_x, ',  kernel_xs :', kernel_xs, '  n_BasisDupl :', n_BasisDupl, ':   ',str(itr+1)+'/'+str(len(itr_list)), end='')\n",
    "                df_result.to_csv('../30_Output/30_csv/100_CheckEigenvalues/100_Results.csv')\n",
    "df_result.to_csv('../30_Output/30_csv/100_CheckEigenvalues/100_Results.csv')\n",
    "print('')\n",
    "print('*** Success ***', '   ('+str(round(time.time()-t0, 1))+'s)')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
