{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### One dimensional data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from function import Kernel_rbf, KRR_estimation\n",
    "from function import Kernel_sobo, choose_lam_r, Kernel_laplace\n",
    "np.random.seed(0)\n",
    "import tqdm\n",
    "\n",
    "\n",
    "# generate underlying true functions and data\n",
    "def f_0(x):\n",
    "    \"\"\"define the mean regression function for 1-dimensional KRR (Example S1 in supplementary material)\"\"\"\n",
    "    # return np.exp(-1/(x**(2)))\n",
    "    return np.sin(10*x)\n",
    "\n",
    "\n",
    "# generate data\n",
    "f_true = f_0\n",
    "def generate_data(n,d,f):\n",
    "    \"\"\"generate data from the mean regression function f (f_0 or f_1)\"\"\"\n",
    "    x_train=np.sort(np.random.normal(0,1,size=(n)))\n",
    "    y_train=f(x_train)+np.random.normal(0,1,n)\n",
    "    return x_train,y_train\n",
    "\n",
    "x_train,y_train=generate_data(100,1,f_true)\n",
    "y_true=f_true(x_train)\n",
    "\n",
    "\n",
    "#generate kernel matrix\n",
    "Gaussian_kernel_matrix = Kernel_rbf(x_train, x_train, sigma=1)\n",
    "Kernel_sobolev_first_order = Kernel_sobo(x_train, x_train)\n",
    "Laplace = Kernel_laplace(x_train, x_train)\n",
    "\n",
    "\n",
    "\n",
    "#varying lambda\n",
    "K = Laplace\n",
    "# K = Gaussian_kernel_matrix\n",
    "lam_list = np.linspace(0.0001,10,100)\n",
    "error_list = np.zeros([len(lam_list), 2])\n",
    "for i in range(len(lam_list)):\n",
    "    result = KRR_estimation(K, y_train, lam_list[i], truncation=False, r=None)\n",
    "    error_list[i,0] = np.linalg.norm(result-y_true)\n",
    "    result = KRR_estimation(K, y_train, lam_list[i], truncation=True, r=10)\n",
    "    error_list[i,1] = np.linalg.norm(result-y_true)\n",
    "optimal_error_full = error_list[np.argmin(error_list[:,0]), 0]\n",
    "optimal_error_trunc = error_list[np.argmin(error_list[:,1]), 1]\n",
    "\n",
    "optimal_error_full = choose_lam_r(K, y_train, y_true, truncation=False)\n",
    "optimal_error_trunc = choose_lam_r(K, y_train, y_true, truncation=True)\n",
    "print(\"The optimal error for full kernel matrix is\", optimal_error_full)\n",
    "print(\"The optimal error for truncated kernel matrix is\", optimal_error_trunc)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import mean_squared_error\n",
    "\n",
    "# plot image of mse for fixed r and varing sample size\n",
    "import matplotlib.pyplot as plt\n",
    "np.random.seed(0)\n",
    "\n",
    "\n",
    "# lam_list = np.logspace(-3, 3, 10)\n",
    "n_list = [200, 300]\n",
    "iter_num = 5\n",
    "mse_list = np.zeros([iter_num, len(n_list), 2])\n",
    "mse_mean = np.zeros([len(n_list), 2])\n",
    "mse_var = np.zeros([len(n_list), 2])\n",
    "\n",
    "for i in range(len(n_list)):\n",
    "    for j in tqdm.tqdm(range(iter_num)):\n",
    "        x_train, y_train = generate_data(n_list[i], 1, f_true)\n",
    "        y_true = f_true(x_train)\n",
    "        K = Kernel_sobo(x_train, x_train)\n",
    "        mse_list[j, i, 0] = choose_lam_r(K, y_train, y_true, truncation=True)\n",
    "        mse_list[j, i, 1] = choose_lam_r(K, y_train, y_true, truncation=False)\n",
    "    mse_mean[i, 0] = np.mean(mse_list[:, i, 0])\n",
    "    mse_var[i, 0] = np.var(mse_list[:, i, 0])\n",
    "    mse_mean[i, 1] = np.mean(mse_list[:, i, 1])\n",
    "    mse_var[i, 1] = np.var(mse_list[:, i, 1])\n",
    "    print(\"n=\", n_list[i], \",truncated mean mse=\", format(mse_mean[i, 0], '.3f'),  \",full mean mse=\", format(mse_mean[i, 1], '.3f'))\n",
    "    print(\"n=\", n_list[i], \",truncated var mse=\", format(mse_var[i, 0], '.3f'),  \",full var mse=\", format(mse_var[i, 1], '.3f'))\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from function import choose_lam\n",
    "import matplotlib.pyplot as plt\n",
    "np.random.seed(0)\n",
    "\n",
    "\n",
    "# Fix n=100\n",
    "n = 500\n",
    "\n",
    "r_list = 10**np.linspace(-2.4, 0, 20)\n",
    "iter_num = 2\n",
    "mse_list = np.zeros([iter_num, len(r_list)])\n",
    "mse_mean = np.zeros([len(r_list)])\n",
    "mse_var = np.zeros([len(r_list)])\n",
    "\n",
    "\n",
    "for i in range(len(r_list)):\n",
    "    for j in range(iter_num):\n",
    "        x_train, y_train = generate_data(n, 1, f_true)\n",
    "        y_true = f_true(x_train)\n",
    "        K = Kernel_sobo(x_train, x_train)\n",
    "        mse_list[j, i] = choose_lam(K, y_train, y_true, \n",
    "                                r=int(r_list[i]*x_train.shape[0]), truncation=True)\n",
    "    print(\"r=\", format(r_list[i], '.3f'), \"mse mean=\", format(np.mean(mse_list[:, i]), '.3f'), \"mse var=\", format(np.var(mse_list[:, i]), '.3f'))\n",
    "    mse_mean[i] = np.mean(mse_list[:, i])\n",
    "    mse_var[i] = np.var(mse_list[:, i])\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Muliple dimension data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from function import Kernel_poly, choose_lam_r, Kernel_rbf\n",
    "import numpy as np\n",
    "\n",
    "\n",
    "def f_m(x):\n",
    "    \"\"\"define the mean regression function for 3-dimensional KRR (Example S2 in supplementary material)\"\"\"\n",
    "    return np.sin(2*np.sum(x, axis=1))\n",
    "\n",
    "\n",
    "def Kernel_poly(x_1, x_2):\n",
    "    n,d = x_1.shape\n",
    "    K = np.zeros([n,n])\n",
    "    for i in range(n):\n",
    "        for j in range(n):\n",
    "            K[i,j] = np.exp(-np.linalg.norm(x_1[i]-x_2[j], ord=1))\n",
    "    return K\n",
    "\n",
    "f_true = f_m\n",
    "def generate_data(n,d,f):\n",
    "    \"\"\"generate data from the mean regression function f (f_0 or f_1)\"\"\"\n",
    "    x_train=np.random.normal(0,1,size=(n, d))\n",
    "    y_train=f(x_train)+np.random.normal(0, 0.5, size=(n))\n",
    "    return x_train,y_train\n",
    "\n",
    "x_train,y_train=generate_data(200,3,f_true)\n",
    "y_true=f_true(x_train)\n",
    "\n",
    "\n",
    "K = Kernel_poly(x_train, x_train)\n",
    "optimal_error_full = choose_lam_r(K, y_train, y_true, truncation=False)\n",
    "optimal_error_trunc = choose_lam_r(K, y_train, y_true, truncation=True)\n",
    "print(\"The optimal error for full kernel matrix is\", optimal_error_full)\n",
    "print(\"The optimal error for truncated kernel matrix is\", optimal_error_trunc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import mean_squared_error\n",
    "import tqdm\n",
    "# plot image of mse for fixed r and varing sample size\n",
    "import matplotlib.pyplot as plt\n",
    "np.random.seed(0)\n",
    "\n",
    "\n",
    "# lam_list = np.logspace(-3, 3, 10)\n",
    "n_list = [200, 300]\n",
    "iter_num = 5\n",
    "mse_list = np.zeros([iter_num, len(n_list), 2])\n",
    "mse_mean = np.zeros([len(n_list), 2])\n",
    "mse_var = np.zeros([len(n_list), 2])\n",
    "\n",
    "for i in range(len(n_list)):\n",
    "    for j in tqdm.tqdm(range(iter_num)):\n",
    "        x_train, y_train = generate_data(n_list[i], 3, f_true)\n",
    "        y_true = f_true(x_train)\n",
    "        K = Kernel_poly(x_train, x_train)\n",
    "        mse_list[j, i, 0] = choose_lam_r(K, y_train, y_true, truncation=True)\n",
    "        mse_list[j, i, 1] = choose_lam_r(K, y_train, y_true, truncation=False)\n",
    "    mse_mean[i, 0] = np.mean(mse_list[:, i, 0])\n",
    "    mse_var[i, 0] = np.var(mse_list[:, i, 0])\n",
    "    mse_mean[i, 1] = np.mean(mse_list[:, i, 1])\n",
    "    mse_var[i, 1] = np.var(mse_list[:, i, 1])\n",
    "    print(\"n=\", n_list[i], \",truncated mean mse=\", format(mse_mean[i, 0], '.3f'),  \",full mean mse=\", format(mse_mean[i, 1], '.3f'))\n",
    "    print(\"n=\", n_list[i], \",truncated var mse=\", format(mse_var[i, 0], '.3f'),  \",full var mse=\", format(mse_var[i, 1], '.3f'))\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from function import choose_lam\n",
    "import matplotlib.pyplot as plt\n",
    "np.random.seed(0)\n",
    "\n",
    "\n",
    "# Fix n=100\n",
    "n = 500\n",
    "U, s, V = np.linalg.svd(K)\n",
    "\n",
    "r_list = 10**np.linspace(-2.4, 0, 30)\n",
    "iter_num = 2\n",
    "mse_list = np.zeros([iter_num, len(r_list)])\n",
    "mse_mean = np.zeros([len(r_list)])\n",
    "mse_var = np.zeros([len(r_list)])\n",
    "\n",
    "\n",
    "for i in range(len(r_list)):\n",
    "    for j in range(iter_num):\n",
    "        x_train, y_train = generate_data(n, 3, f_true)\n",
    "        y_true = f_true(x_train)\n",
    "        K = Kernel_poly(x_train, x_train)\n",
    "        mse_list[j, i] = choose_lam(K, y_train, y_true, \n",
    "                                r=int(r_list[i]*x_train.shape[0]), truncation=True)\n",
    "    print(\"r=\", format(r_list[i], '.3f'), \"mse mean=\", format(np.mean(mse_list[:, i]), '.3f'), \"mse var=\", format(np.var(mse_list[:, i]), '.3f'))\n",
    "    mse_mean[i] = np.mean(mse_list[:, i])\n",
    "    mse_var[i] = np.var(mse_list[:, i])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "pytorch",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
