{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "jp-MarkdownHeadingCollapsed": true,
    "tags": []
   },
   "source": [
    "# one dimension"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## loss=l2, $\\tau$=0.3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from function import Kernel_rbf, choose_lam_r_quantile, choose_lam_quantile\n",
    "from function import Kernel_sobo, Kernel_laplace\n",
    "import scipy.stats as stats\n",
    "import tqdm\n",
    "\n",
    "def f_0(x):\n",
    "    \"\"\"define the mean regression function for 1-dimensional KRR (Example S1 in supplementary material)\"\"\"\n",
    "    # return np.exp(-1/(x**(2)))\n",
    "    return np.sin(10*x)\n",
    "\n",
    "sd = 3\n",
    "tau = 0.3\n",
    "f_true = f_0\n",
    "\n",
    "\n",
    "\n",
    "def generate_data(n, f):\n",
    "    \"\"\"generate data from the mean regression function f (f_0 or f_1)\"\"\"\n",
    "    x_train=np.sort(np.random.rand(n))\n",
    "    y_train=f(x_train)+np.random.normal(0, sd, n) - stats.norm.ppf(tau, loc=0, scale=sd)\n",
    "    return x_train,y_train\n",
    "\n",
    "x_train,y_train=generate_data(200,f_true)\n",
    "y_true=f_true(x_train)\n",
    "\n",
    "\n",
    "#generate kernel matrix\n",
    "Gaussian_kernel_matrix = Kernel_rbf(x_train, x_train, sigma=1)\n",
    "Sobolev = Kernel_sobo(x_train, x_train)\n",
    "Laplace = Kernel_laplace(x_train, x_train)\n",
    "\n",
    "\n",
    "\n",
    "# K = Sobolev\n",
    "K = Sobolev\n",
    "optimal_error_full = choose_lam_r_quantile(K, y_train, y_true, truncation=False, tau=tau, loss_type=\"l2\")\n",
    "optimal_error_trunc = choose_lam_r_quantile(K, y_train, y_true, truncation=True, tau=tau, loss_type=\"l2\")\n",
    "print(\"The optimal error for full kernel matrix is\", optimal_error_full)\n",
    "print(\"The optimal error for truncated kernel matrix is\", optimal_error_trunc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import mean_squared_error\n",
    "\n",
    "# plot image of mse for fixed r and varing sample size\n",
    "import matplotlib.pyplot as plt\n",
    "np.random.seed(0)\n",
    "\n",
    "\n",
    "# lam_list = np.logspace(-3, 3, 10)\n",
    "n_list = [200, 300]\n",
    "iter_num = 2\n",
    "mse_list = np.zeros([iter_num, len(n_list), 2])\n",
    "mse_mean = np.zeros([len(n_list), 2])\n",
    "mse_var = np.zeros([len(n_list), 2])\n",
    "\n",
    "for i in range(len(n_list)):\n",
    "    for j in tqdm.tqdm(range(iter_num)):\n",
    "        x_train, y_train = generate_data(n_list[i],  f_true)\n",
    "        y_true = f_true(x_train)\n",
    "        K = Kernel_sobo(x_train, x_train)\n",
    "        mse_list[j, i, 0] = choose_lam_r_quantile(K, y_train, y_true, truncation=True, tau=tau, loss_type=\"l2\")\n",
    "        mse_list[j, i, 1] = choose_lam_r_quantile(K, y_train, y_true, truncation=False, tau=tau, loss_type=\"l2\")\n",
    "    mse_mean[i, 0] = np.mean(mse_list[:, i, 0])\n",
    "    mse_var[i, 0] = np.var(mse_list[:, i, 0])\n",
    "    mse_mean[i, 1] = np.mean(mse_list[:, i, 1])\n",
    "    mse_var[i, 1] = np.var(mse_list[:, i, 1])\n",
    "    print(\"n=\", n_list[i], \",truncated mean mse=\", format(mse_mean[i, 0], '.3f'),  \",full mean mse=\", format(mse_mean[i, 1], '.3f'))\n",
    "    print(\"n=\", n_list[i], \",truncated var mse=\", format(mse_var[i, 0], '.3f'),  \",full var mse=\", format(mse_var[i, 1], '.3f'))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "np.random.seed(0)\n",
    "\n",
    "\n",
    "# Fix n=100\n",
    "n = 500\n",
    "tau = 0.3\n",
    "\n",
    "r_list = 10**np.linspace(-2.4, 0, 20)\n",
    "iter_num = 2\n",
    "mse_list = np.zeros([iter_num, len(r_list)])\n",
    "mse_mean = np.zeros([len(r_list)])\n",
    "mse_var = np.zeros([len(r_list)])\n",
    "\n",
    "\n",
    "for i in range(len(r_list)):\n",
    "    for j in range(iter_num):\n",
    "        x_train, y_train = generate_data(n, f_true)\n",
    "        y_true = f_true(x_train)\n",
    "        K = Kernel_sobo(x_train, x_train)   \n",
    "        mse_list[j, i] = choose_lam_quantile(K, y_train, y_true, \n",
    "                                r=np.max([1, int(r_list[i]*x_train.shape[0])]), truncation=True, tau=tau, loss_type=\"l2\")\n",
    "    mse_mean[i] = np.mean(mse_list[:, i])\n",
    "    mse_var[i] = np.var(mse_list[:, i])\n",
    "    print(\"r=\", format(r_list[i], '.3f'), \",truncated mean mse=\", format(mse_mean[i], '.3f'), \",truncated var mse=\", format(mse_var[i], '.3f'))\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## loss=l2, $\\tau$=0.5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import mean_squared_error\n",
    "from function import Kernel_sobo, choose_lam_r_quantile\n",
    "import numpy as np\n",
    "import scipy.stats as stats\n",
    "import tqdm\n",
    "# plot image of mse for fixed r and varing sample size\n",
    "import matplotlib.pyplot as plt\n",
    "np.random.seed(0)\n",
    "\n",
    "def f_0(x):\n",
    "    \"\"\"define the mean regression function for 1-dimensional KRR (Example S1 in supplementary material)\"\"\"\n",
    "    # return np.exp(-1/(x**(2)))\n",
    "    return np.sin(10*x)\n",
    "\n",
    "sd = 3\n",
    "tau = 0.5\n",
    "f_true = f_0\n",
    "\n",
    "def generate_data(n, f):\n",
    "    \"\"\"generate data from the mean regression function f (f_0 or f_1)\"\"\"\n",
    "    x_train=np.sort(np.random.rand(n))\n",
    "    y_train=f(x_train)+np.random.normal(0, sd, n) - stats.norm.ppf(tau, loc=0, scale=sd)\n",
    "    return x_train,y_train\n",
    "\n",
    "\n",
    "# lam_list = np.logspace(-3, 3, 10)\n",
    "n_list = [200, 300]\n",
    "iter_num = 2\n",
    "mse_list = np.zeros([iter_num, len(n_list), 2])\n",
    "mse_mean = np.zeros([len(n_list), 2])\n",
    "mse_var = np.zeros([len(n_list), 2])\n",
    "\n",
    "for i in range(len(n_list)):\n",
    "    for j in tqdm.tqdm(range(iter_num)):\n",
    "        x_train, y_train = generate_data(n_list[i],  f_true)\n",
    "        y_true = f_true(x_train)\n",
    "        K = Kernel_sobo(x_train, x_train)\n",
    "        mse_list[j, i, 0] = choose_lam_r_quantile(K, y_train, y_true, truncation=True, tau=tau, loss_type=\"l2\")\n",
    "        mse_list[j, i, 1] = choose_lam_r_quantile(K, y_train, y_true, truncation=False, tau=tau, loss_type=\"l2\")\n",
    "    mse_mean[i, 0] = np.mean(mse_list[:, i, 0])\n",
    "    mse_var[i, 0] = np.var(mse_list[:, i, 0])\n",
    "    mse_mean[i, 1] = np.mean(mse_list[:, i, 1])\n",
    "    mse_var[i, 1] = np.var(mse_list[:, i, 1])\n",
    "    print(\"n=\", n_list[i], \",truncated mean mse=\", format(mse_mean[i, 0], '.3f'),  \",full mean mse=\", format(mse_mean[i, 1], '.3f'))\n",
    "    print(\"n=\", n_list[i], \",truncated var mse=\", format(mse_var[i, 0], '.3f'),  \",full var mse=\", format(mse_var[i, 1], '.3f'))\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "np.random.seed(0)\n",
    "# Fix n=100\n",
    "n = 500\n",
    "r_list = 10**np.linspace(-2.4, 0, 20)\n",
    "iter_num = 2\n",
    "mse_list = np.zeros([iter_num, len(r_list)])\n",
    "mse_mean = np.zeros([len(r_list)])\n",
    "mse_var = np.zeros([len(r_list)])\n",
    "\n",
    "\n",
    "for i in range(len(r_list)):\n",
    "    for j in range(iter_num):\n",
    "        x_train, y_train = generate_data(n, f_true)\n",
    "        y_true = f_true(x_train)\n",
    "        K = Kernel_sobo(x_train, x_train)   \n",
    "        mse_list[j, i] = choose_lam_quantile(K, y_train, y_true, \n",
    "                                r=np.max([1, int(r_list[i]*x_train.shape[0])]), truncation=True, tau=tau, loss_type=\"l2\")\n",
    "    mse_mean[i] = np.mean(mse_list[:, i])\n",
    "    mse_var[i] = np.var(mse_list[:, i])\n",
    "    print(\"r=\", format(r_list[i], '.3f'), \",truncated mean mse=\", format(mse_mean[i], '.3f'), \",truncated var mse=\", format(mse_var[i], '.3f'))\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## loss=l2 $\\tau$=0.7"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import mean_squared_error\n",
    "from function import Kernel_sobo, choose_lam_r_quantile\n",
    "import numpy as np\n",
    "import scipy.stats as stats\n",
    "import tqdm\n",
    "# plot image of mse for fixed r and varing sample size\n",
    "import matplotlib.pyplot as plt\n",
    "np.random.seed(0)\n",
    "\n",
    "def f_0(x):\n",
    "    \"\"\"define the mean regression function for 1-dimensional KRR (Example S1 in supplementary material)\"\"\"\n",
    "    # return np.exp(-1/(x**(2)))\n",
    "    return np.sin(10*x)\n",
    "\n",
    "sd = 3\n",
    "tau = 0.7\n",
    "f_true = f_0\n",
    "\n",
    "def generate_data(n, f):\n",
    "    \"\"\"generate data from the mean regression function f (f_0 or f_1)\"\"\"\n",
    "    x_train=np.sort(np.random.rand(n))\n",
    "    y_train=f(x_train)+np.random.normal(0, sd, n) - stats.norm.ppf(tau, loc=0, scale=sd)\n",
    "    return x_train,y_train\n",
    "\n",
    "\n",
    "# lam_list = np.logspace(-3, 3, 10)\n",
    "n_list = [200, 300]\n",
    "# n_list = [20, 50]\n",
    "iter_num = 2\n",
    "mse_list = np.zeros([iter_num, len(n_list), 2])\n",
    "mse_mean = np.zeros([len(n_list), 2])\n",
    "mse_var = np.zeros([len(n_list), 2])\n",
    "\n",
    "for i in range(len(n_list)):\n",
    "    for j in tqdm.tqdm(range(iter_num)):\n",
    "        x_train, y_train = generate_data(n_list[i],  f_true)\n",
    "        y_true = f_true(x_train)\n",
    "        K = Kernel_sobo(x_train, x_train)\n",
    "        mse_list[j, i, 0] = choose_lam_r_quantile(K, y_train, y_true, truncation=True, tau=tau, loss_type=\"l2\")\n",
    "        mse_list[j, i, 1] = choose_lam_r_quantile(K, y_train, y_true, truncation=False, tau=tau, loss_type=\"l2\")\n",
    "    mse_mean[i, 0] = np.mean(mse_list[:, i, 0])\n",
    "    mse_var[i, 0] = np.var(mse_list[:, i, 0])\n",
    "    mse_mean[i, 1] = np.mean(mse_list[:, i, 1])\n",
    "    mse_var[i, 1] = np.var(mse_list[:, i, 1])\n",
    "    print(\"n=\", n_list[i], \",truncated mean mse=\", format(mse_mean[i, 0], '.3f'),  \",full mean mse=\", format(mse_mean[i, 1], '.3f'))\n",
    "    print(\"n=\", n_list[i], \",truncated var mse=\", format(mse_var[i, 0], '.3f'),  \",full var mse=\", format(mse_var[i, 1], '.3f'))\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "from function import Kernel_sobo, choose_lam_quantile\n",
    "import matplotlib.pyplot as plt\n",
    "np.random.seed(0)\n",
    "# Fix n=100\n",
    "n = 500\n",
    "r_list = 10**np.linspace(-2.4, 0, 20)\n",
    "iter_num = 2\n",
    "mse_list = np.zeros([iter_num, len(r_list)])\n",
    "mse_mean = np.zeros([len(r_list)])\n",
    "mse_var = np.zeros([len(r_list)])\n",
    "\n",
    "\n",
    "for i in range(len(r_list)):\n",
    "    for j in range(iter_num):\n",
    "        x_train, y_train = generate_data(n, f_true)\n",
    "        y_true = f_true(x_train)\n",
    "        K = Kernel_sobo(x_train, x_train)   \n",
    "        mse_list[j, i] = choose_lam_quantile(K, y_train, y_true, \n",
    "                                r=np.max([1, int(r_list[i]*x_train.shape[0])]), truncation=True, tau=tau, loss_type=\"l2\")\n",
    "    mse_mean[i] = np.mean(mse_list[:, i])\n",
    "    mse_var[i] = np.var(mse_list[:, i])\n",
    "    print(\"r=\", format(r_list[i], '.3f'), \",truncated mean mse=\", format(mse_mean[i], '.3f'), \",truncated var mse=\", format(mse_var[i], '.3f'))\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## loss=excess_risk, $\\tau=0.3$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import mean_squared_error\n",
    "from function import Kernel_sobo, choose_lam_r_quantile\n",
    "import numpy as np\n",
    "import scipy.stats as stats\n",
    "import tqdm\n",
    "# plot image of mse for fixed r and varing sample size\n",
    "import matplotlib.pyplot as plt\n",
    "np.random.seed(0)\n",
    "\n",
    "def f_0(x):\n",
    "    \"\"\"define the mean regression function for 1-dimensional KRR (Example S1 in supplementary material)\"\"\"\n",
    "    # return np.exp(-1/(x**(2)))\n",
    "    return np.sin(10*x)\n",
    "\n",
    "sd = 3\n",
    "tau = 0.3\n",
    "f_true = f_0\n",
    "\n",
    "def generate_data(n, f):\n",
    "    \"\"\"generate data from the mean regression function f (f_0 or f_1)\"\"\"\n",
    "    x_train=np.sort(np.random.rand(n))\n",
    "    y_train=f(x_train)+np.random.normal(0, sd, n) - stats.norm.ppf(tau, loc=0, scale=sd)\n",
    "    return x_train,y_train\n",
    "\n",
    "\n",
    "# lam_list = np.logspace(-3, 3, 10)\n",
    "# n_list = [200, 500, 1000, 1500, 2000]\n",
    "n_list = [200, 300]\n",
    "iter_num = 2\n",
    "mse_list = np.zeros([iter_num, len(n_list), 2])\n",
    "mse_mean = np.zeros([len(n_list), 2])\n",
    "mse_var = np.zeros([len(n_list), 2])\n",
    "\n",
    "for i in range(len(n_list)):\n",
    "    for j in tqdm.tqdm(range(iter_num)):\n",
    "        x_train, y_train = generate_data(n_list[i],  f_true)\n",
    "        y_true = f_true(x_train)\n",
    "        K = Kernel_sobo(x_train, x_train)\n",
    "        mse_list[j, i, 0] = choose_lam_r_quantile(K, y_train, y_true, truncation=True, tau=tau, loss_type=\"excess_risk\")\n",
    "        mse_list[j, i, 1] = choose_lam_r_quantile(K, y_train, y_true, truncation=False, tau=tau, loss_type=\"excess_risk\")\n",
    "    mse_mean[i, 0] = np.mean(mse_list[:, i, 0])\n",
    "    mse_var[i, 0] = np.var(mse_list[:, i, 0])\n",
    "    mse_mean[i, 1] = np.mean(mse_list[:, i, 1])\n",
    "    mse_var[i, 1] = np.var(mse_list[:, i, 1])\n",
    "    print(\"n=\", n_list[i], \",truncated mean mse=\", format(mse_mean[i, 0], '.3f'),  \",full mean mse=\", format(mse_mean[i, 1], '.3f'))\n",
    "    print(\"n=\", n_list[i], \",truncated var mse=\", format(mse_var[i, 0], '.3f'),  \",full var mse=\", format(mse_var[i, 1], '.3f'))\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "from function import Kernel_sobo, choose_lam_quantile\n",
    "import matplotlib.pyplot as plt\n",
    "np.random.seed(0)\n",
    "\n",
    "n = 500\n",
    "r_list = 10**np.linspace(-2.4, 0, 20)\n",
    "iter_num = 2\n",
    "mse_list = np.zeros([iter_num, len(r_list)])\n",
    "mse_mean = np.zeros([len(r_list)])\n",
    "mse_var = np.zeros([len(r_list)])\n",
    "\n",
    "\n",
    "for i in range(len(r_list)):\n",
    "    for j in range(iter_num):\n",
    "        x_train, y_train = generate_data(n, f_true)\n",
    "        y_true = f_true(x_train)\n",
    "        K = Kernel_sobo(x_train, x_train)   \n",
    "        mse_list[j, i] = choose_lam_quantile(K, y_train, y_true, \n",
    "                                r=np.max([1, int(r_list[i]*x_train.shape[0])]), truncation=True, tau=tau, loss_type=\"excess_risk\")\n",
    "    mse_mean[i] = np.mean(mse_list[:, i])\n",
    "    mse_var[i] = np.var(mse_list[:, i])\n",
    "    print(\"r=\", format(r_list[i], '.3f'), \",truncated mean mse=\", format(mse_mean[i], '.3f'), \",truncated var mse=\", format(mse_var[i], '.3f'))\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## loss = exces_risk, $\\tau=0.5$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import mean_squared_error\n",
    "from function import Kernel_sobo, choose_lam_r_quantile\n",
    "import numpy as np\n",
    "import scipy.stats as stats\n",
    "import tqdm\n",
    "# plot image of mse for fixed r and varing sample size\n",
    "import matplotlib.pyplot as plt\n",
    "np.random.seed(0)\n",
    "\n",
    "def f_0(x):\n",
    "    \"\"\"define the mean regression function for 1-dimensional KRR (Example S1 in supplementary material)\"\"\"\n",
    "    # return np.exp(-1/(x**(2)))\n",
    "    return np.sin(10*x)\n",
    "\n",
    "sd = 3\n",
    "tau = 0.5\n",
    "f_true = f_0\n",
    "\n",
    "def generate_data(n, f):\n",
    "    \"\"\"generate data from the mean regression function f (f_0 or f_1)\"\"\"\n",
    "    x_train=np.sort(np.random.rand(n))\n",
    "    y_train=f(x_train)+np.random.normal(0, sd, n) - stats.norm.ppf(tau, loc=0, scale=sd)\n",
    "    return x_train,y_train\n",
    "\n",
    "\n",
    "# lam_list = np.logspace(-3, 3, 10)\n",
    "n_list = [200, 300]\n",
    "iter_num = 2\n",
    "mse_list = np.zeros([iter_num, len(n_list), 2])\n",
    "mse_mean = np.zeros([len(n_list), 2])\n",
    "mse_var = np.zeros([len(n_list), 2])\n",
    "\n",
    "for i in range(len(n_list)):\n",
    "    for j in tqdm.tqdm(range(iter_num)):\n",
    "        x_train, y_train = generate_data(n_list[i],  f_true)\n",
    "        y_true = f_true(x_train)\n",
    "        K = Kernel_sobo(x_train, x_train)\n",
    "        mse_list[j, i, 0] = choose_lam_r_quantile(K, y_train, y_true, truncation=True, tau=tau, loss_type=\"excess_risk\")\n",
    "        mse_list[j, i, 1] = choose_lam_r_quantile(K, y_train, y_true, truncation=False, tau=tau, loss_type=\"excess_risk\")\n",
    "    mse_mean[i, 0] = np.mean(mse_list[:, i, 0])\n",
    "    mse_var[i, 0] = np.var(mse_list[:, i, 0])\n",
    "    mse_mean[i, 1] = np.mean(mse_list[:, i, 1])\n",
    "    mse_var[i, 1] = np.var(mse_list[:, i, 1])\n",
    "    print(\"n=\", n_list[i], \",truncated mean mse=\", format(mse_mean[i, 0], '.3f'),  \",full mean mse=\", format(mse_mean[i, 1], '.3f'))\n",
    "    print(\"n=\", n_list[i], \",truncated var mse=\", format(mse_var[i, 0], '.3f'),  \",full var mse=\", format(mse_var[i, 1], '.3f'))\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "from function import Kernel_sobo, choose_lam_quantile\n",
    "import matplotlib.pyplot as plt\n",
    "np.random.seed(0)\n",
    "\n",
    "n = 500\n",
    "r_list = 10**np.linspace(-2.4, 0, 20)\n",
    "iter_num = 2\n",
    "mse_list = np.zeros([iter_num, len(r_list)])\n",
    "mse_mean = np.zeros([len(r_list)])\n",
    "mse_var = np.zeros([len(r_list)])\n",
    "\n",
    "\n",
    "for i in range(len(r_list)):\n",
    "    for j in range(iter_num):\n",
    "        x_train, y_train = generate_data(n, f_true)\n",
    "        y_true = f_true(x_train)\n",
    "        K = Kernel_sobo(x_train, x_train)   \n",
    "        mse_list[j, i] = choose_lam_quantile(K, y_train, y_true, \n",
    "                                r=np.max([1, int(r_list[i]*x_train.shape[0])]), truncation=True, tau=tau, loss_type=\"excess_risk\")\n",
    "    mse_mean[i] = np.mean(mse_list[:, i])\n",
    "    mse_var[i] = np.var(mse_list[:, i])\n",
    "    print(\"r=\", format(r_list[i], '.3f'), \",truncated mean mse=\", format(mse_mean[i], '.3f'), \",truncated var mse=\", format(mse_var[i], '.3f'))\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## loss= excess_risk $\\tau$=0.7"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import mean_squared_error\n",
    "from function import Kernel_sobo, choose_lam_r_quantile\n",
    "import numpy as np\n",
    "import scipy.stats as stats\n",
    "import tqdm\n",
    "# plot image of mse for fixed r and varing sample size\n",
    "import matplotlib.pyplot as plt\n",
    "np.random.seed(0)\n",
    "\n",
    "def f_0(x):\n",
    "    \"\"\"define the mean regression function for 1-dimensional KRR (Example S1 in supplementary material)\"\"\"\n",
    "    # return np.exp(-1/(x**(2)))\n",
    "    return np.sin(10*x)\n",
    "\n",
    "sd = 3\n",
    "tau = 0.7\n",
    "f_true = f_0\n",
    "\n",
    "def generate_data(n, f):\n",
    "    \"\"\"generate data from the mean regression function f (f_0 or f_1)\"\"\"\n",
    "    x_train=np.sort(np.random.rand(n))\n",
    "    y_train=f(x_train)+np.random.normal(0, sd, n) - stats.norm.ppf(tau, loc=0, scale=sd)\n",
    "    return x_train,y_train\n",
    "\n",
    "\n",
    "n_list = [200,300]\n",
    "# n_list = [20, 50]\n",
    "iter_num = 2\n",
    "mse_list = np.zeros([iter_num, len(n_list), 2])\n",
    "mse_mean = np.zeros([len(n_list), 2])\n",
    "mse_var = np.zeros([len(n_list), 2])\n",
    "\n",
    "for i in range(len(n_list)):\n",
    "    for j in tqdm.tqdm(range(iter_num)):\n",
    "        x_train, y_train = generate_data(n_list[i],  f_true)\n",
    "        y_true = f_true(x_train)\n",
    "        K = Kernel_sobo(x_train, x_train)\n",
    "        mse_list[j, i, 0] = choose_lam_r_quantile(K, y_train, y_true, truncation=True, tau=tau, loss_type=\"excess_risk\")\n",
    "        mse_list[j, i, 1] = choose_lam_r_quantile(K, y_train, y_true, truncation=False, tau=tau, loss_type=\"excess_risk\")\n",
    "    mse_mean[i, 0] = np.mean(mse_list[:, i, 0])\n",
    "    mse_var[i, 0] = np.var(mse_list[:, i, 0])\n",
    "    mse_mean[i, 1] = np.mean(mse_list[:, i, 1])\n",
    "    mse_var[i, 1] = np.var(mse_list[:, i, 1])\n",
    "    print(\"n=\", n_list[i], \",truncated mean mse=\", format(mse_mean[i, 0], '.3f'),  \",full mean mse=\", format(mse_mean[i, 1], '.3f'))\n",
    "    print(\"n=\", n_list[i], \",truncated var mse=\", format(mse_var[i, 0], '.3f'),  \",full var mse=\", format(mse_var[i, 1], '.3f'))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "from function import Kernel_sobo, choose_lam_quantile\n",
    "import matplotlib.pyplot as plt\n",
    "np.random.seed(0)\n",
    "\n",
    "n = 500\n",
    "r_list = 10**np.linspace(-2.4, 0, 20)\n",
    "iter_num = 2\n",
    "mse_list = np.zeros([iter_num, len(r_list)])\n",
    "mse_mean = np.zeros([len(r_list)])\n",
    "mse_var = np.zeros([len(r_list)])\n",
    "\n",
    "\n",
    "for i in range(len(r_list)):\n",
    "    for j in range(iter_num):\n",
    "        x_train, y_train = generate_data(n, f_true)\n",
    "        y_true = f_true(x_train)\n",
    "        K = Kernel_sobo(x_train, x_train)   \n",
    "        mse_list[j, i] = choose_lam_quantile(K, y_train, y_true, \n",
    "                                r=np.max([1, int(r_list[i]*x_train.shape[0])]), truncation=True, tau=tau, loss_type=\"excess_risk\")\n",
    "    mse_mean[i] = np.mean(mse_list[:, i])\n",
    "    mse_var[i] = np.var(mse_list[:, i])\n",
    "    print(\"r=\", format(r_list[i], '.3f'), \",truncated mean mse=\", format(mse_mean[i], '.3f'), \",truncated var mse=\", format(mse_var[i], '.3f'))\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# multi dimension"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from function import Kernel_poly\n",
    "import numpy as np\n",
    "from function import Kernel_rbf, choose_lam_r_quantile, choose_lam_quantile\n",
    "from function import Kernel_sobo, Kernel_laplace\n",
    "import scipy.stats as stats\n",
    "from sklearn.metrics import mean_squared_error\n",
    "import matplotlib.pyplot as plt\n",
    "import tqdm\n",
    "\n",
    "np.random.seed(0)\n",
    "\n",
    "def f_0(x):\n",
    "    \"\"\"define the mean regression function for 3-dimensional KRR (Example S2 in supplementary material)\"\"\"\n",
    "    return np.sin(2*np.sum(x, axis=1))\n",
    "\n",
    "\n",
    "def Kernel_poly(x_1, x_2):\n",
    "    n,d = x_1.shape\n",
    "    K = np.zeros([n,n])\n",
    "    for i in range(n):\n",
    "        for j in range(n):\n",
    "            K[i,j] = np.exp(-np.linalg.norm(x_1[i]-x_2[j], ord=1))\n",
    "    return K\n",
    "\n",
    "f_true = f_0\n",
    "def generate_data(n, f, tau):\n",
    "    \"\"\"generate data from the mean regression function f (f_0 or f_1)\"\"\"\n",
    "    x_train=np.sort(np.random.rand(n, 3))\n",
    "    y_train=f(x_train)+np.random.normal(0, sd, n) - stats.norm.ppf(tau, loc=0, scale=sd)\n",
    "    return x_train,y_train\n",
    "sd = 3\n",
    "\n",
    "\n",
    "\n",
    "for cha_tau in [\"3\", \"5\", \"7\"]:\n",
    "    for cha_loss in [\"loss_l2\", \"loss_risk\"]:\n",
    "        print(\"--------------------------------------------------\")\n",
    "        tau = float(cha_tau)/10\n",
    "        print(cha_tau, cha_loss)\n",
    "        if cha_loss==\"loss_l2\":\n",
    "            loss = \"l2\"\n",
    "        else:\n",
    "            loss = \"excess_risk\"\n",
    "        n_list = [200, 500, 1000, 1500, 2000]\n",
    "        iter_num = 50\n",
    "        mse_list = np.zeros([iter_num, len(n_list), 2])\n",
    "        mse_mean = np.zeros([len(n_list), 2])\n",
    "        mse_var = np.zeros([len(n_list), 2])\n",
    "        for i in range(len(n_list)):\n",
    "            for j in tqdm.tqdm(range(iter_num)):\n",
    "                x_train, y_train = generate_data(n_list[i],  f_true, tau=tau)\n",
    "                y_true = f_true(x_train)\n",
    "                K = Kernel_poly(x_train, x_train)\n",
    "                mse_list[j, i, 0] = choose_lam_r_quantile(K, y_train, y_true, truncation=True, tau=tau, loss_type=loss)\n",
    "                mse_list[j, i, 1] = choose_lam_r_quantile(K, y_train, y_true, truncation=False, tau=tau, loss_type=loss)\n",
    "            mse_mean[i, 0] = np.mean(mse_list[:, i, 0])\n",
    "            mse_var[i, 0] = np.var(mse_list[:, i, 0])\n",
    "            mse_mean[i, 1] = np.mean(mse_list[:, i, 1])\n",
    "            mse_var[i, 1] = np.var(mse_list[:, i, 1])\n",
    "            print(\"n=\", n_list[i], \",truncated mean mse=\", format(mse_mean[i, 0], '.3f'),  \",full mean mse=\", format(mse_mean[i, 1], '.3f'))\n",
    "            print(\"n=\", n_list[i], \",truncated var mse=\", format(mse_var[i, 0], '.3f'),  \",full var mse=\", format(mse_var[i, 1], '.3f'))\n",
    "\n",
    "        np.save(\"./multi_data/kqr/\"+cha_loss+\"/tau\"+cha_tau+\"/n_mse_list.npy\", mse_list)\n",
    "        np.save(\"./multi_data/kqr/\"+cha_loss+\"/tau\"+cha_tau+\"/n_list.npy\", n_list)\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "        # Fix n=100\n",
    "        n = 500\n",
    "        iter_num = 50\n",
    "        r_list = 10**np.linspace(-2.4, 0, 20)\n",
    "        mse_list = np.zeros([iter_num, len(r_list)])\n",
    "        mse_mean = np.zeros([len(r_list)])\n",
    "        mse_var = np.zeros([len(r_list)])\n",
    "        for i in range(len(r_list)):\n",
    "            for j in range(iter_num):\n",
    "                x_train, y_train = generate_data(n, f_true, tau=tau)\n",
    "                y_true = f_true(x_train)\n",
    "                K = Kernel_poly(x_train, x_train)\n",
    "                mse_list[j, i] = choose_lam_quantile(K, y_train, y_true, \n",
    "                                        r=np.max([1, int(r_list[i]*x_train.shape[0])]), truncation=True, tau=tau, loss_type=loss)\n",
    "            mse_mean[i] = np.mean(mse_list[:, i])\n",
    "            mse_var[i] = np.var(mse_list[:, i])\n",
    "            if i%5==0:\n",
    "                print(\"r=\", format(r_list[i], '.3f'), \",truncated mean mse=\", format(mse_mean[i], '.3f'), \",truncated var mse=\", format(mse_var[i], '.3f'))\n",
    "        np.save(\"./multi_data/kqr/\"+cha_loss+\"/tau\"+cha_tau+\"/r_mse_list.npy\", mse_list)\n",
    "        np.save(\"./multi_data/kqr/\"+cha_loss+\"/tau\"+cha_tau+\"/r_n_list.npy\", r_list)\n",
    "\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
