{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# one dimension"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from function import Kernel_rbf, choose_lam_r_lr, choose_lam_lr\n",
    "from function import Kernel_sobo, Kernel_laplace\n",
    "import scipy.stats as stats\n",
    "\n",
    "def f_0(x):\n",
    "    \"\"\"define the mean regression function for 1-dimensional KRR (Example S1 in supplementary material)\"\"\"\n",
    "    # return np.exp(-1/(x**(2)))\n",
    "    return np.sin(15*x)\n",
    "\n",
    "f_true = f_0\n",
    "\n",
    "def generate_data(n, f):\n",
    "    \"\"\"generate data from the mean regression function f (f_0)\"\"\"\n",
    "    x_train=np.sort(np.random.rand(n))\n",
    "    prob_tr=1/(1+np.exp(-f_0(x_train)))\n",
    "    y_train=np.random.binomial(1,p=prob_tr)\n",
    "    y_train[y_train==0]=-1\n",
    "    return x_train,y_train\n",
    "\n",
    "x_train,y_train = generate_data(200,f_true)\n",
    "y_true=f_true(x_train)\n",
    "\n",
    "\n",
    "#generate kernel matrix\n",
    "Gaussian_kernel_matrix = Kernel_rbf(x_train, x_train, sigma=1)\n",
    "Kernel_sobolev_first_order = Kernel_sobo(x_train, x_train)\n",
    "Laplace = Kernel_laplace(x_train, x_train)\n",
    "\n",
    "\n",
    "\n",
    "K = Kernel_sobolev_first_order\n",
    "# K = Laplace\n",
    "optimal_error_full = choose_lam_r_lr(K, x_train, y_train, y_true, truncation=False, loss_type=\"one_zero\")\n",
    "optimal_error_trunc = choose_lam_r_lr(K, x_train, y_train, y_true, truncation=True, loss_type=\"one_zero\")\n",
    "print(\"The optimal error for full kernel matrix is\", optimal_error_full)\n",
    "print(\"The optimal error for truncated kernel matrix is\", optimal_error_trunc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import mean_squared_error\n",
    "import tqdm\n",
    "# plot image of mse for fixed r and varing sample size\n",
    "import matplotlib.pyplot as plt\n",
    "np.random.seed(1)\n",
    "\n",
    "\n",
    "# lam_list = np.logspace(-3, 3, 10)\n",
    "n_list = [200, 300]\n",
    "iter_num = 5\n",
    "mse_list = np.zeros([iter_num, len(n_list), 2])\n",
    "mse_mean = np.zeros([len(n_list), 2])\n",
    "mse_var = np.zeros([len(n_list), 2])\n",
    "\n",
    "for i in range(len(n_list)):\n",
    "    for j in tqdm.tqdm(range(iter_num)):\n",
    "        x_train, y_train = generate_data(n_list[i],  f_true)\n",
    "        y_true = f_true(x_train)\n",
    "        K = Kernel_sobo(x_train, x_train)\n",
    "        mse_list[j, i, 0] = choose_lam_r_lr(K, x_train, y_train, y_true, truncation=True, loss_type=\"one_zero\")\n",
    "        mse_list[j, i, 1] = choose_lam_r_lr(K, x_train, y_train, y_true, truncation=False, loss_type=\"one_zero\")\n",
    "    mse_mean[i, 0] = np.mean(mse_list[:, i, 0])\n",
    "    mse_var[i, 0] = np.var(mse_list[:, i, 0])\n",
    "    mse_mean[i, 1] = np.mean(mse_list[:, i, 1])\n",
    "    mse_var[i, 1] = np.var(mse_list[:, i, 1])\n",
    "    print(\"n=\", n_list[i], \",truncated mean mse=\", format(mse_mean[i, 0], '.3f'),  \",full mean mse=\", format(mse_mean[i, 1], '.3f'))\n",
    "    print(\"n=\", n_list[i], \",truncated var mse=\", format(mse_var[i, 0], '.3f'),  \",full var mse=\", format(mse_var[i, 1], '.3f'))\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from math import e\n",
    "from function import choose_lam, Kernel_sobo, choose_lam_lr\n",
    "import tqdm\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "np.random.seed(0)\n",
    "from function import choose_lam_svm\n",
    "\n",
    "\n",
    "def f_1(x):\n",
    "    \"\"\"define the mean regression function for 1-dimensional KRR (Example S1 in supplementary material)\"\"\"\n",
    "    # return np.exp(-1/(x**(2)))\n",
    "    return np.sin(5*x)\n",
    "\n",
    "f_true = f_1\n",
    "\n",
    "def generate_data(n, f):\n",
    "    \"\"\"generate data from the mean regression function f (f_0)\"\"\"\n",
    "    x_train=np.sort(np.random.rand(n))\n",
    "    prob_tr=1/(1+np.exp(-f(x_train)))\n",
    "    y_train=np.random.binomial(1,p=prob_tr)\n",
    "    y_train[y_train==0]=-1\n",
    "    return x_train,y_train\n",
    "\n",
    "# Fix n=100\n",
    "n = 100\n",
    "r_list = 10**np.linspace(-2, 0, 20)\n",
    "iter_num = 2\n",
    "mse_list = np.zeros([iter_num, len(r_list)])\n",
    "mse_mean = np.zeros([len(r_list)])\n",
    "mse_var = np.zeros([len(r_list)])\n",
    "\n",
    "\n",
    "for j in tqdm.tqdm(range(iter_num)):\n",
    "    x_train, y_train = generate_data(n, f_1)\n",
    "    y_true = f_true(x_train)\n",
    "    K = Kernel_sobo(x_train, x_train)\n",
    "    U, s, V = np.linalg.svd(K)\n",
    "    for i in range(len(r_list)):\n",
    "        mse_list[j, i] = choose_lam_lr(K, x_train, y_train, y_true, \n",
    "                                r=int(r_list[i]*n), truncation=True, loss_type=\"one_zero\", pre_SVD=(U, s, V))\n",
    "for i in range(len(r_list)):\n",
    "    print(\"r=\", format(r_list[i], '.3f'), \"mse mean=\", format(np.mean(mse_list[:, i]), '.3f'), \"mse var=\", format(np.var(mse_list[:, i]), '.3f'))\n",
    "    mse_mean[i] = np.mean(mse_list[:, i])\n",
    "    mse_var[i] = np.var(mse_list[:, i])\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# multidimension "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from function import Kernel_poly, choose_lam_r_lr, Kernel_rbf\n",
    "import numpy as np\n",
    "\n",
    "\n",
    "def f_m(x):\n",
    "    \"\"\"define the mean regression function for 3-dimensional KRR (Example S2 in supplementary material)\"\"\"\n",
    "    return np.sin(3*np.sum(x, axis=1))\n",
    "\n",
    "\n",
    "def Kernel_poly(x_1, x_2):\n",
    "    n= x_1.shape[0]\n",
    "    K = np.zeros([n,n])\n",
    "    for i in range(n):\n",
    "        for j in range(n):\n",
    "            K[i,j] = np.exp(-np.linalg.norm(x_1[i]-x_2[j], ord=1))\n",
    "    return K\n",
    "\n",
    "f_true = f_m\n",
    "\n",
    "def generate_data(n):\n",
    "    \"\"\"generate data from the mean regression function f (f_0)\"\"\"\n",
    "    x_train=np.random.rand(n, 3)\n",
    "    prob_tr=1/(1+np.exp(-f_true(x_train)))\n",
    "    y_train=np.random.binomial(1,p=prob_tr)\n",
    "    y_train[y_train==0]=-1\n",
    "    return x_train,y_train\n",
    "\n",
    "x_train,y_train=generate_data(200)\n",
    "y_true=f_true(x_train)\n",
    "\n",
    "\n",
    "K = Kernel_poly(x_train, x_train)\n",
    "optimal_error_full = choose_lam_r_lr(K, x_train, y_train, y_true, truncation=False, loss_type=\"one_zero\")\n",
    "optimal_error_trunc = choose_lam_r_lr(K, x_train, y_train, y_true, truncation=True, loss_type=\"one_zero\")\n",
    "print(\"The optimal error for full kernel matrix is\", optimal_error_full)\n",
    "print(\"The optimal error for truncated kernel matrix is\", optimal_error_trunc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import mean_squared_error\n",
    "import tqdm\n",
    "# plot image of mse for fixed r and varing sample size\n",
    "import matplotlib.pyplot as plt\n",
    "np.random.seed(1)\n",
    "\n",
    "\n",
    "# lam_list = np.logspace(-3, 3, 10)\n",
    "n_list = [200,300]\n",
    "iter_num = 5\n",
    "mse_list = np.zeros([iter_num, len(n_list), 2])\n",
    "mse_mean = np.zeros([len(n_list), 2])\n",
    "mse_var = np.zeros([len(n_list), 2])\n",
    "\n",
    "for i in range(len(n_list)):\n",
    "    for j in tqdm.tqdm(range(iter_num)):\n",
    "        x_train, y_train = generate_data(n_list[i])\n",
    "        y_true = f_true(x_train)\n",
    "        K = Kernel_poly(x_train, x_train)\n",
    "        mse_list[j, i, 0] = choose_lam_r_lr(K, x_train, y_train, y_true, truncation=True, loss_type=\"one_zero\")\n",
    "        mse_list[j, i, 1] = choose_lam_r_lr(K, x_train, y_train, y_true, truncation=False, loss_type=\"one_zero\")\n",
    "    mse_mean[i, 0] = np.mean(mse_list[:, i, 0])\n",
    "    mse_var[i, 0] = np.var(mse_list[:, i, 0])\n",
    "    mse_mean[i, 1] = np.mean(mse_list[:, i, 1])\n",
    "    mse_var[i, 1] = np.var(mse_list[:, i, 1])\n",
    "    print(\"n=\", n_list[i], \",truncated mean mse=\", format(mse_mean[i, 0], '.3f'),  \",full mean mse=\", format(mse_mean[i, 1], '.3f'))\n",
    "    print(\"n=\", n_list[i], \",truncated var mse=\", format(mse_var[i, 0], '.3f'),  \",full var mse=\", format(mse_var[i, 1], '.3f'))\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from function import choose_lam, Kernel_sobo, choose_lam_lr\n",
    "import tqdm\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "np.random.seed()\n",
    "from function import choose_lam_svm\n",
    "\n",
    "# Fix n=100\n",
    "n = 100\n",
    "r_list = 10**np.linspace(-2, 0, 20)\n",
    "iter_num = 2\n",
    "mse_list = np.zeros([iter_num, len(r_list)])\n",
    "mse_mean = np.zeros([len(r_list)])\n",
    "mse_var = np.zeros([len(r_list)])\n",
    "\n",
    "\n",
    "for j in tqdm.tqdm(range(iter_num)):\n",
    "    x_train, y_train = generate_data(n)\n",
    "    y_true = f_true(x_train)\n",
    "    K = Kernel_poly(x_train, x_train)\n",
    "    U, s, V = np.linalg.svd(K)\n",
    "    for i in range(len(r_list)):\n",
    "        mse_list[j, i] = choose_lam_lr(K, x_train, y_train, y_true, \n",
    "                                r=int(r_list[i]*n), truncation=True, loss_type=\"one_zero\", pre_SVD=(U, s, V))\n",
    "for i in range(len(r_list)):\n",
    "    print(\"r=\", format(r_list[i], '.3f'), \"mse mean=\", format(np.mean(mse_list[:, i]), '.3f'), \"mse var=\", format(np.var(mse_list[:, i]), '.3f'))\n",
    "    mse_mean[i] = np.mean(mse_list[:, i])\n",
    "    mse_var[i] = np.var(mse_list[:, i])\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "com",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
