{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Python packages used in this code"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib\n",
    "import matplotlib.pyplot as plt\n",
    "import random\n",
    "from random import shuffle\n",
    "import os\n",
    "import pickle\n",
    "import time\n",
    "import sklearn\n",
    "import platform\n",
    "import sys\n",
    "from sklearn.kernel_ridge import KernelRidge\n",
    "from sklearn.gaussian_process.kernels import Matern, RBF\n",
    "from sklearn.base import BaseEstimator, RegressorMixin\n",
    "from sklearn.model_selection import train_test_split, GridSearchCV\n",
    "# from sklearn.linear_model import Ridge, LinearRegression\n",
    "from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score\n",
    "import warnings\n",
    "warnings.simplefilter('ignore')\n",
    "from IPython.display import clear_output\n",
    "from scipy import io\n",
    "import math\n",
    "import joblib\n",
    "\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\"\n",
    "Environments\n",
    "\n",
    "--Platform--\n",
    "OS : macOS-10.16-x86_64-i386-64bit\n",
    "--Version--\n",
    "python :  3.9.12 (main, Jun  1 2022, 06:36:29) \n",
    "[Clang 12.0.0 ]\n",
    "numpy : 1.23.1\n",
    "pandas : 1.4.3\n",
    "sklearn : 1.1.1\n",
    "\"\"\"\n",
    "\n",
    "print('--Platform--')\n",
    "print('OS :', platform.platform())\n",
    "print('--Version--')\n",
    "print('python : ', sys.version)\n",
    "print('numpy :', np.__version__)\n",
    "print('pandas :', pd.__version__)\n",
    "print('sklearn :', sklearn.__version__)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Preparation"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Define the model class proposed in the paper"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class KernelRidge_HM(BaseEstimator, RegressorMixin):\n",
    "    def __init__(self, gamma=1, lambda1=1, nu=1.5, kernel='linear'):\n",
    "        \"\"\"\n",
    "        Define the kernel ridge model with intercept.\n",
    "            h(x) = <\\theta, x> + intercept\n",
    "                x  : discriptor\n",
    "        \n",
    "        Parameters\n",
    "        ----------\n",
    "            gamma   : length scale of the kernel\n",
    "            lambda1 : regularization parameter for theta\n",
    "            nu      : parameter controlling the smoothness of Matern kernel\n",
    "            kernel  : kernel used for the model ('rbf' or 'matern')\n",
    "            \n",
    "        \"\"\"\n",
    "        self.gamma = gamma\n",
    "        self.lambda1 = lambda1\n",
    "        self.nu = nu\n",
    "        self.kernel = kernel\n",
    "        \n",
    "    def make_gram(self, X, Y, gamma, nu, kernel):\n",
    "        \"\"\"\n",
    "        Making the Gram matrix\n",
    "            In sklearn, RBF kernel is defined as exp(-|x-x'|^2/(2l^2)).\n",
    "            To be consistent with the kernel used in the proposed method, in which RBF kernel is defined as exp(-\\gamma |x-x'|^2), we use 'length_sacle=np.sqrt(1/(2*gamma))'.\n",
    "        \"\"\"\n",
    "        if kernel=='rbf':\n",
    "            K = RBF(length_scale=np.sqrt(1/(2*gamma)))(X,Y)\n",
    "        elif kernel=='matern':\n",
    "            K = Matern(length_scale=np.sqrt(1/(2*gamma)), nu=nu)(X,Y)\n",
    "        return pd.DataFrame(K)\n",
    "    \n",
    "    def low_rank_inv(self, A, tol):\n",
    "        \"\"\"\n",
    "        Computing the low-rank approximation of a inverse matrix with SVD.\n",
    "        \n",
    "        Parameters\n",
    "        ----------\n",
    "            tol : Threshold below which SVD values are considered zero.\n",
    "        \"\"\"\n",
    "        u, s, vh = np.linalg.svd(A)\n",
    "        r = np.linalg.matrix_rank(np.diag(s), tol=tol)\n",
    "        ur = u[:, :r]\n",
    "        sr = s[:r]\n",
    "        vhr = vh[:r, :]\n",
    "        Ar = np.matmul(ur, np.multiply(sr[..., np.newaxis], vhr))\n",
    "        Ard = np.matmul(vhr.T, np.multiply(1/sr[..., np.newaxis], ur.T))\n",
    "        return Ard\n",
    "    \n",
    "    # def fit(self, X, y=None):\n",
    "    #     tol = 1e-5\n",
    "    #     self.X = X\n",
    "    #     self.y = y\n",
    "    #     self.n_sample, self.dim_train = self.X.shape   \n",
    "    #     self.gram = self.make_gram(X, X, gamma=self.gamma, nu=self.nu, kernel=self.kernel)\n",
    "    #     self.theta = self.low_rank_inv(self.gram + self.lambda1*np.diag(np.ones(self.n_sample)), tol=tol).dot(self.y)\n",
    "    #     self.intercept = 0\n",
    "    #     return self\n",
    "    \n",
    "    def fit(self, X, y=None):\n",
    "        \"\"\"\n",
    "        Model fitting\n",
    "        \n",
    "        Required grobal variables\n",
    "        -----------------------\n",
    "            None\n",
    "        \n",
    "        Returns\n",
    "        -------\n",
    "            X             : descriptors\n",
    "            y             : output\n",
    "            n_sample      : number of sumples\n",
    "            dim_x         : dimension of the features\n",
    "            gram          : Gram matrix using X\n",
    "            \n",
    "            theta         : Estimated parameter\n",
    "            intercept     : Estimated intercept\n",
    "        \"\"\"\n",
    "        # Setting\n",
    "        tol = 1e-4\n",
    "        self.X = X\n",
    "        self.y = y\n",
    "        self.n_sample, self.dim_x = self.X.shape   \n",
    "        self.gram = self.make_gram(X, X, gamma=self.gamma, nu=self.nu, kernel=self.kernel)\n",
    "        \n",
    "        # Parameter estimation\n",
    "        tmp_mat = np.block([\n",
    "            [self.gram.dot(self.gram)+self.lambda1*self.gram                   , pd.DataFrame(self.gram.dot(np.ones(self.n_sample)))],\n",
    "            [pd.DataFrame(np.ones(self.n_sample).reshape(1,-1).dot(self.gram)), pd.DataFrame(np.ones(self.n_sample).reshape(1,-1).dot(np.ones(self.n_sample)))]\n",
    "        ])\n",
    "        tmp_gram = np.block([\n",
    "            [self.gram],\n",
    "            [pd.DataFrame(np.ones(self.n_sample).reshape(1,-1))]\n",
    "        ])\n",
    "        tmp_theta = self.low_rank_inv(tmp_mat, tol=tol).dot(tmp_gram).dot(self.y)\n",
    "        self.theta = tmp_theta[:self.n_sample]\n",
    "        self.intercept = tmp_theta[self.n_sample]\n",
    "        return self\n",
    "    \n",
    "    def predict(self, X):\n",
    "        \"\"\"\n",
    "        Prediction function\n",
    "            h(x) = Gram \\alpha + intercept\n",
    "            \n",
    "        Returns\n",
    "        -------\n",
    "            y_pred : Gram \\alpha + intercept\n",
    "        \"\"\"\n",
    "        pred_gram = self.make_gram(X, self.X, gamma=self.gamma, nu=self.nu, kernel=self.kernel)\n",
    "        y_pred = pred_gram.dot(self.theta) + self.intercept\n",
    "        y_pred.index = X.index\n",
    "        return y_pred\n",
    "\n",
    "    def score(self, X, y=None):\n",
    "        \"\"\"\n",
    "        Score function for cross-validation\n",
    "        \n",
    "        Returns\n",
    "        -------\n",
    "            -\\sum(y-\\hat{y})/n (Consider the minus value because 'GridSearchCV' maximize the score.)\n",
    "        \"\"\"\n",
    "        return -sum((y.values - self.predict(X).values)**2)/self.n_sample\n",
    "    \n",
    "    def get_params(self, deep=True):\n",
    "        \"\"\"\n",
    "        Create parameter dictionary for cross-validation\n",
    "        \n",
    "        Returns\n",
    "        -------\n",
    "            {'gamma', 'lambda1', 'nu', 'kernel'}\n",
    "        \"\"\"\n",
    "        return {'gamma' : self.gamma,\n",
    "                'lambda1' : self.lambda1,\n",
    "                'nu' : self.nu,\n",
    "                'kernel' : self.kernel}\n",
    "    \n",
    "    def set_params(self, **parameters):\n",
    "        \"\"\"\n",
    "        For cross-validation\n",
    "        \"\"\"\n",
    "        for parameter, value in parameters.items():\n",
    "            setattr(self, parameter, value)\n",
    "        return self     "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "class AffineTrans(BaseEstimator, RegressorMixin):\n",
    "    def __init__(self, gamma1=1, gamma2=1, gamma3=1, lambda1=1, lambda2=1, lambda3=1, nu=1.5, kernel='linear'):\n",
    "        \"\"\"\n",
    "        Define the model class proposed in the paper\n",
    "            h(x) = intercept_a + <\\alpha, \\Phi_1> + (<\\beta, \\Phi_2> + 1)<\\gamma, \\Phi_3>\n",
    "        \n",
    "        Parameters\n",
    "        ----------\n",
    "            gamma1  : length scale of the kernel1\n",
    "            gamma2  : length scale of the kernel2\n",
    "            gamma3  : length scale of the kernel3\n",
    "            lambda1 : regularization parameter for alpha\n",
    "            lambda2 : regularization parameter for beta\n",
    "            lambda3 : reguralization parameter for gamma\n",
    "            nu      : parameter controlling the smoothness of Matern kernel\n",
    "            kernel  : kernel used for the model ('rbf' or 'matern')\n",
    "        \"\"\"\n",
    "        self.gamma1 = gamma1\n",
    "        self.gamma2 = gamma2\n",
    "        self.gamma3 = gamma3\n",
    "        self.lambda1 = lambda1\n",
    "        self.lambda2 = lambda2\n",
    "        self.lambda3 = lambda3\n",
    "        self.nu = nu\n",
    "        self.kernel = kernel\n",
    "        \n",
    "    def make_gram(self, X, Y, gamma, nu, kernel):\n",
    "        \"\"\"\n",
    "        Making the Gram matrix\n",
    "            In sklearn, RBF kernel is defined as exp(-|x-x'|^2/(2l^2)).\n",
    "            To be consistent with the kernel used in the proposed method, in which RBF kernel is defined as exp(-\\gamma |x-x'|^2), we use 'length_sacle=np.sqrt(1/(2*gamma))'.\n",
    "        \"\"\"\n",
    "        if kernel=='rbf':\n",
    "            K = RBF(length_scale=np.sqrt(1/(2*gamma)))(X,Y)\n",
    "        elif kernel=='matern':\n",
    "            K = Matern(length_scale=np.sqrt(1/(2*gamma)), nu=nu)(X,Y)\n",
    "        return pd.DataFrame(K)\n",
    "    \n",
    "    def low_rank_inv(self, A, tol):\n",
    "        \"\"\"\n",
    "        Computing the low-rank approximation of a inverse matrix with SVD.\n",
    "        \n",
    "        Parameters\n",
    "        ----------\n",
    "            tol : Threshold below which SVD values are considered zero.\n",
    "        \"\"\"\n",
    "        u, s, vh = np.linalg.svd(A)\n",
    "        r = np.linalg.matrix_rank(np.diag(s), tol=tol)\n",
    "        ur = u[:, :r]\n",
    "        sr = s[:r]\n",
    "        vhr = vh[:r, :]\n",
    "        Ar = np.matmul(ur, np.multiply(sr[..., np.newaxis], vhr))\n",
    "        Ard = np.matmul(vhr.T, np.multiply(1/sr[..., np.newaxis], ur.T))\n",
    "        return Ard\n",
    "\n",
    "    def make_diff(self, w_new, w_old):\n",
    "        \"\"\"\n",
    "        Function to calculate parameter changes for algorithm convergence determination\n",
    "            We use \\max{|w_new - w_old|}/\\max{|w_old|} for determining the convergence.\n",
    "            This criterion is used in some algorithms in scikit-learn, for example, see https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Lasso.html\n",
    "            We apply this criterion to each of \\alpha, \\beta and \\gamma separately and use their maximum value for the convergence decision.\n",
    "        \"\"\"\n",
    "        diff1 = np.max(np.abs(w_new-w_old))\n",
    "        diff2 = np.max(np.abs(w_old))\n",
    "        if diff2 < 1e-10:\n",
    "            out = 0\n",
    "        else:\n",
    "            out = diff1/diff2\n",
    "        return out\n",
    "\n",
    "    def estimation_from_b(self):\n",
    "        \"\"\"\n",
    "        Block relaxation algorthm\n",
    "            See paper for the updated formula\n",
    "        \"\"\"\n",
    "        # Repeat until convergence\n",
    "        self.convergence = False\n",
    "        for i_loop in range(self.n_loop):\n",
    "            # Setting\n",
    "            self.i_count += 1\n",
    "            lambda_n = 1\n",
    "            tol = 1e-5\n",
    "            \n",
    "            K1a1 = self.gram1.dot(self.a) + self.intercept_a\n",
    "            K3c = self.gram3.dot(self.c)\n",
    "            \n",
    "            # Updating b\n",
    "            ## Rounding of numbers for stability of estimation\n",
    "            if np.linalg.norm(self.c) < 1e-2:\n",
    "                self.b = np.zeros(self.n_sample)\n",
    "                self.intercept_b = 0\n",
    "            else:\n",
    "                tmp_b = np.diag(K3c).dot(np.diag(K3c)).dot(self.gram2) + lambda_n*self.lambda2*np.diag(np.ones(self.n_sample))\n",
    "                self.b = self.low_rank_inv(tmp_b, tol=tol).dot(K3c*(self.y.values - K1a1 - self.intercept_b*K3c))\n",
    "                self.intercept_b = 1\n",
    "            ## Rounding of numbers for stability of estimation\n",
    "            if np.linalg.norm(self.b) < 1e-2:\n",
    "                self.b = np.zeros(self.n_sample)\n",
    "            K2b1 = self.gram2.dot(self.b) + self.intercept_b\n",
    "            \n",
    "            # Updating c\n",
    "            tmp_c = np.diag(K2b1).dot(np.diag(K2b1)).dot(self.gram3) + lambda_n*self.lambda3*np.diag(np.ones(self.n_sample))\n",
    "            self.c = self.low_rank_inv(tmp_c, tol=tol).dot(K2b1*(self.y.values - K1a1))\n",
    "            ## Rounding of numbers for stability of estimation\n",
    "            if np.linalg.norm(self.c) < 1e-2:\n",
    "                self.c = np.zeros(self.n_sample)\n",
    "            K3c = self.gram3.dot(self.c)\n",
    "            \n",
    "            # Updating a\n",
    "            self.a = self.Minv.dot(self.y.values - self.intercept_a - K2b1*K3c)\n",
    "            self.intercept_a = np.sum(self.y.values - self.gram1.dot(self.a) - K2b1*K3c)/(self.n_sample + self.lambda_a)\n",
    "            \n",
    "            # Store the parameters\n",
    "            self.result_b[self.i_count] = self.b\n",
    "            self.result_c[self.i_count] = self.c\n",
    "            self.result_a[self.i_count] = self.a\n",
    "            \n",
    "            # Compute the difference\n",
    "            diff_a = self.make_diff(w_new=self.result_a[self.i_count], w_old=self.result_a[self.i_count-1])\n",
    "            diff_b = self.make_diff(w_new=self.result_b[self.i_count], w_old=self.result_b[self.i_count-1])\n",
    "            diff_c = self.make_diff(w_new=self.result_c[self.i_count], w_old=self.result_c[self.i_count-1])\n",
    "            diff = np.max([diff_a, diff_b, diff_c])\n",
    "            self.diff_a[self.i_count] = diff_a\n",
    "            self.diff_b[self.i_count] = diff_b\n",
    "            self.diff_c[self.i_count] = diff_c\n",
    "            self.diff[self.i_count] = diff\n",
    "\n",
    "            # Check the convergence\n",
    "            if diff < 1e-4:\n",
    "                self.convergence = True\n",
    "                break\n",
    "        return self\n",
    "    \n",
    "    def fit(self, X, y=None):\n",
    "        \"\"\"\n",
    "        Model fitting\n",
    "        \n",
    "        Required grobal variables\n",
    "        -----------------------\n",
    "            dim_x         : dimension of the discriptor\n",
    "            ini_alpha     : initial value for \\alpha_1\n",
    "            ini_intercept : initial value for \\alpha_0\n",
    "            ini_beta      : initial value for \\beta\n",
    "            ini_gamma     : initial value for \\gamma\n",
    "        \n",
    "        Returns\n",
    "        -------\n",
    "            X             : discriptors + source features\n",
    "            X_train       : descriptors\n",
    "            X_source      : source features\n",
    "            n_sample      : number of sumples\n",
    "            dim_x         : dimension of the discriptor\n",
    "            \n",
    "            gram1         : Gram matrix using X_source\n",
    "            gram2         : Gram matrix using X_source\n",
    "            gram3         : Gram matrix using X_train\n",
    "            \n",
    "            n_loop        : maximum number of iterations\n",
    "\n",
    "            result_a      : dataframe to store a in all iterations\n",
    "            result_b      : dataframe to store b in all iterations\n",
    "            result_c      : dataframe to store c in all iterations\n",
    "            diff_a        : series to store the difference between \\alpha_1_new and \\alpha_1_old \n",
    "            diff_b        : series to store the difference between \\beta_new and \\beta_old\n",
    "            diff_c        : series to store the difference between \\gamma_new and \\gamma_old \n",
    "            diff          : series to store the difference between AllParams_new and AllParams_old \n",
    "            \n",
    "            Minv          : (K1 + \\Lambda_1 I)^{-1}\n",
    "\n",
    "        \"\"\"\n",
    "        # Setting\n",
    "        self.X_train = X.iloc[:,:dim_x]\n",
    "        self.X_source = X.iloc[:,dim_x:]\n",
    "        self.n_sample, self.dim_x = self.X_train.shape\n",
    "        self.X = X\n",
    "        self.y = y\n",
    "        self.n_loop = 1000\n",
    "        self.result_a = np.zeros([self.n_loop+1, self.n_sample])\n",
    "        self.result_b = np.zeros([self.n_loop+1, self.n_sample])\n",
    "        self.result_c = np.zeros([self.n_loop+1, self.n_sample])\n",
    "        self.diff_a = np.zeros(self.n_loop+1)\n",
    "        self.diff_b = np.zeros(self.n_loop+1)\n",
    "        self.diff_c = np.zeros(self.n_loop+1)\n",
    "        self.diff = np.zeros(self.n_loop+1)\n",
    "               \n",
    "        # Compute Gram matrices\n",
    "        self.gram1 = self.make_gram(self.X_source, self.X_source, gamma=self.gamma1, nu=self.nu, kernel=self.kernel)\n",
    "        self.gram2 = self.make_gram(self.X_source, self.X_source, gamma=self.gamma2, nu=self.nu, kernel=self.kernel)\n",
    "        self.gram3 = self.make_gram(self.X_train, self.X_train, gamma=self.gamma3, nu=self.nu, kernel=self.kernel)\n",
    "        \n",
    "        # Initialization\n",
    "        self.Minv = np.linalg.pinv(self.gram1 + self.lambda1*np.diag(np.ones(self.n_sample)), hermitian=True)\n",
    "        fix_seed(0)\n",
    "        self.b = np.random.randn(self.n_sample)\n",
    "        self.c = np.random.randn(self.n_sample)\n",
    "        self.a = self.Minv.dot(self.y.values)\n",
    "        self.i_count = 0\n",
    "        self.lambda_a = 0\n",
    "        self.lambda_b = 0\n",
    "        self.intercept_a = np.sum(self.y.values - self.gram1.dot(self.a))/(self.n_sample + self.lambda_a)\n",
    "        self.intercept_b = 0.5\n",
    "        \n",
    "        # Store\n",
    "        self.result_a[self.i_count] = self.a.reshape([-1])\n",
    "        self.result_b[self.i_count] = self.b.reshape([-1])\n",
    "        self.result_c[self.i_count] = self.c.reshape([-1])\n",
    "        self.diff_a[self.i_count] = np.nan\n",
    "        self.diff_b[self.i_count] = np.nan\n",
    "        self.diff_c[self.i_count] = np.nan\n",
    "        self.diff[self.i_count] = np.nan\n",
    "        \n",
    "        # Estimation\n",
    "        self.estimation_from_b()\n",
    "        \n",
    "        # Dataframe shaping\n",
    "        self.result_a = self.result_a[:(self.i_count+1),:]\n",
    "        self.result_b = self.result_b[:(self.i_count+1),:]\n",
    "        self.result_c = self.result_c[:(self.i_count+1),:]\n",
    "        self.diff_a = self.diff_a[:(self.i_count+1)]\n",
    "        self.diff_b = self.diff_b[:(self.i_count+1)]\n",
    "        self.diff_c = self.diff_c[:(self.i_count+1)]\n",
    "        self.diff = self.diff[:(self.i_count+1)]\n",
    "        \n",
    "        return self\n",
    "    \n",
    "    def predict(self, X):\n",
    "        \"\"\"\n",
    "        Prediction function\n",
    "            h(x) = <\\alpha, \\Phi_1> + (<\\beta, \\phi_2> + 1) * (<\\gamma, \\Phi_3>) + intercept\n",
    "            \n",
    "        Returns\n",
    "        -------\n",
    "            y_pred : Gram1 a + (Gram2 * b + 1) * (Gram3 * b) + intercept\n",
    "        \"\"\"\n",
    "        X_source_pred = X.iloc[:,dim_x:]\n",
    "        X_train_pred = X.iloc[:,:dim_x]\n",
    "        \n",
    "        pred_gram1 = self.make_gram(X_source_pred, self.X_source, gamma=self.gamma1, nu=self.nu, kernel=self.kernel)\n",
    "        pred_gram2 = self.make_gram(X_source_pred, self.X_source, gamma=self.gamma2, nu=self.nu, kernel=self.kernel)\n",
    "        pred_gram3 = self.make_gram(X_train_pred, self.X_train, gamma=self.gamma3, nu=self.nu, kernel=self.kernel)\n",
    "        \n",
    "        self.pred1 = pred_gram1.dot(self.a)\n",
    "        self.pred2 = pred_gram2.dot(self.b)\n",
    "        self.pred3 = pred_gram3.dot(self.c)\n",
    "        y_pred = pred_gram1.dot(self.a) + self.intercept_a + (pred_gram2.dot(self.b)+self.intercept_b)*(pred_gram3.dot(self.c))\n",
    "        y_pred.index = X.index\n",
    "        \n",
    "        return y_pred\n",
    "\n",
    "    def score(self, X, y=None):\n",
    "        \"\"\"\n",
    "        Score function for cross-validation\n",
    "        \n",
    "        Returns\n",
    "        -------\n",
    "            -\\sum(y-\\hat{y})/n (Consider the minus value because 'GridSearchCV' maximize the score.)\n",
    "        \"\"\"\n",
    "        return -sum((y.values - self.predict(X).values)**2)/self.n_sample\n",
    "    \n",
    "    def get_params(self, deep=True):\n",
    "        \"\"\"\n",
    "        Create parameter dictionary for cross-validation\n",
    "        \n",
    "        Returns\n",
    "        -------\n",
    "            {'gamma1', 'gamma2', 'gamma3', 'lambda1', 'lambda2', 'lambda3', 'nu', 'kernel'}\n",
    "        \"\"\"\n",
    "        return {'gamma1' : self.gamma1,\n",
    "                'gamma2' : self.gamma2,\n",
    "                'gamma3' : self.gamma3,\n",
    "                'lambda1' : self.lambda1,\n",
    "                'lambda2' : self.lambda2,\n",
    "                'lambda3' : self.lambda3,\n",
    "                'nu' : self.nu,\n",
    "                'kernel' : self.kernel}\n",
    "    \n",
    "    def set_params(self, **parameters):\n",
    "        \"\"\"\n",
    "        For cross-validation\n",
    "        \"\"\"\n",
    "        for parameter, value in parameters.items():\n",
    "            setattr(self, parameter, value)\n",
    "        return self     "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class AffineTrans2(BaseEstimator, RegressorMixin):\n",
    "    def __init__(self, gamma1=1, gamma2=1, gamma3=1, lambda1=1, lambda2=1, lambda3=1, nu=1.5, kernel='linear'):\n",
    "        \"\"\"\n",
    "        Define the model class proposed in the paper\n",
    "            h(x) = intercept_a + (<\\beta, \\Phi_2> + 1)<\\gamma, \\Phi_3>\n",
    "        \n",
    "        Parameters\n",
    "        ----------\n",
    "            gamma1  : length scale of the kernel1 (not used)\n",
    "            gamma2  : length scale of the kernel2\n",
    "            gamma3  : length scale of the kernel3\n",
    "            lambda1 : regularization parameter for alpha (not used)\n",
    "            lambda2 : regularization parameter for beta\n",
    "            lambda3 : reguralization parameter for gamma\n",
    "            nu      : parameter controlling the smoothness of Matern kernel\n",
    "            kernel  : kernel used for the model ('rbf' or 'matern')\n",
    "        \"\"\"\n",
    "        self.gamma1 = gamma1\n",
    "        self.gamma2 = gamma2\n",
    "        self.gamma3 = gamma3\n",
    "        self.lambda1 = lambda1\n",
    "        self.lambda2 = lambda2\n",
    "        self.lambda3 = lambda3\n",
    "        self.nu = nu\n",
    "        self.kernel = kernel\n",
    "        \n",
    "    def make_gram(self, X, Y, gamma, nu, kernel):\n",
    "        \"\"\"\n",
    "        Making the Gram matrix\n",
    "            In sklearn, RBF kernel is defined as exp(-|x-x'|^2/(2l^2)).\n",
    "            To be consistent with the kernel used in the proposed method, in which RBF kernel is defined as exp(-\\gamma |x-x'|^2), we use 'length_sacle=np.sqrt(1/(2*gamma))'.\n",
    "        \"\"\"\n",
    "        if kernel=='rbf':\n",
    "            K = RBF(length_scale=np.sqrt(1/(2*gamma)))(X,Y)\n",
    "        elif kernel=='matern':\n",
    "            K = Matern(length_scale=np.sqrt(1/(2*gamma)), nu=nu)(X,Y)\n",
    "        return pd.DataFrame(K)\n",
    "    \n",
    "    def low_rank_inv(self, A, tol):\n",
    "        \"\"\"\n",
    "        Computing the low-rank approximation of a inverse matrix with SVD.\n",
    "        \n",
    "        Parameters\n",
    "        ----------\n",
    "            tol : Threshold below which SVD values are considered zero.\n",
    "        \"\"\"\n",
    "        u, s, vh = np.linalg.svd(A)\n",
    "        r = np.linalg.matrix_rank(np.diag(s), tol=tol)\n",
    "        ur = u[:, :r]\n",
    "        sr = s[:r]\n",
    "        vhr = vh[:r, :]\n",
    "        Ar = np.matmul(ur, np.multiply(sr[..., np.newaxis], vhr))\n",
    "        Ard = np.matmul(vhr.T, np.multiply(1/sr[..., np.newaxis], ur.T))\n",
    "        return Ard\n",
    "\n",
    "    def make_diff(self, w_new, w_old):\n",
    "        \"\"\"\n",
    "        Function to calculate parameter changes for algorithm convergence determination\n",
    "            We use \\max{|w_new - w_old|}/\\max{|w_old|} for determining the convergence.\n",
    "            This criterion is used in some algorithms in scikit-learn, for example, see https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Lasso.html\n",
    "            We apply this criterion to each of \\alpha, \\beta and \\gamma separately and use their maximum value for the convergence decision.\n",
    "        \"\"\"\n",
    "        diff1 = np.max(np.abs(w_new-w_old))\n",
    "        diff2 = np.max(np.abs(w_old))\n",
    "        if diff2 < 1e-10:\n",
    "            out = 0\n",
    "        else:\n",
    "            out = diff1/diff2\n",
    "        return out\n",
    "\n",
    "    def estimation_from_b(self):\n",
    "        \"\"\"\n",
    "        Block relaxation algorthm\n",
    "            See paper for the updated formula\n",
    "        \"\"\"\n",
    "        # Repeat until convergence\n",
    "        self.convergence = False\n",
    "        for i_loop in range(self.n_loop):\n",
    "            # Setting\n",
    "            self.i_count += 1\n",
    "            lambda_n = 1\n",
    "            tol = 1e-5\n",
    "            \n",
    "            K1a1 = self.gram1.dot(self.a) + self.intercept_a\n",
    "            K3c = self.gram3.dot(self.c)\n",
    "            \n",
    "            # Updating b\n",
    "            ## Rounding of numbers for stability of estimation\n",
    "            if np.linalg.norm(self.c) < 1e-2:\n",
    "                self.b = np.zeros(self.n_sample)\n",
    "                self.intercept_b = 0\n",
    "            else:\n",
    "                tmp_b = np.diag(K3c).dot(np.diag(K3c)).dot(self.gram2) + lambda_n*self.lambda2*np.diag(np.ones(self.n_sample))\n",
    "                self.b = self.low_rank_inv(tmp_b, tol=tol).dot(K3c*(self.y.values - K1a1 - self.intercept_b*K3c))\n",
    "                self.intercept_b = 1\n",
    "            K2b1 = self.gram2.dot(self.b) + self.intercept_b\n",
    "            \n",
    "            # Updating c\n",
    "            tmp_c = np.diag(K2b1).dot(np.diag(K2b1)).dot(self.gram3) + lambda_n*self.lambda3*np.diag(np.ones(self.n_sample))\n",
    "            self.c = self.low_rank_inv(tmp_c, tol=tol).dot(K2b1*(self.y.values - K1a1))\n",
    "            ## Rounding of numbers for stability of estimation\n",
    "            if np.linalg.norm(self.c) < 1e-2:\n",
    "                self.c = np.zeros(self.n_sample)\n",
    "            K3c = self.gram3.dot(self.c)\n",
    "            \n",
    "            # Set zero to a and estimate the intercept\n",
    "            self.a = np.zeros(self.n_sample)\n",
    "            self.intercept_a = np.sum(self.y.values - self.gram1.dot(self.a) - K2b1*K3c)/(self.n_sample + self.lambda_a)\n",
    "            \n",
    "            # Store the parameters\n",
    "            self.result_b[self.i_count] = self.b\n",
    "            self.result_c[self.i_count] = self.c\n",
    "            self.result_a[self.i_count] = self.a\n",
    "\n",
    "            # Compute the difference\n",
    "            diff_a = self.make_diff(w_new=self.result_a[self.i_count], w_old=self.result_a[self.i_count-1])\n",
    "            diff_b = self.make_diff(w_new=self.result_b[self.i_count], w_old=self.result_b[self.i_count-1])\n",
    "            diff_c = self.make_diff(w_new=self.result_c[self.i_count], w_old=self.result_c[self.i_count-1])\n",
    "            diff = np.max([diff_a, diff_b, diff_c])\n",
    "            self.diff_a[self.i_count] = diff_a\n",
    "            self.diff_b[self.i_count] = diff_b\n",
    "            self.diff_c[self.i_count] = diff_c\n",
    "            self.diff[self.i_count] = diff\n",
    "\n",
    "            # Check the convergence\n",
    "            if diff < 1e-4:\n",
    "                self.convergence = True\n",
    "                break\n",
    "        return self\n",
    "    \n",
    "    def fit(self, X, y=None):\n",
    "        \"\"\"\n",
    "        Model fitting\n",
    "        \n",
    "        Required grobal variables\n",
    "        -----------------------\n",
    "            dim_x         : dimension of the discriptor\n",
    "            ini_alpha     : initial value for \\alpha_1\n",
    "            ini_intercept : initial value for \\alpha_0\n",
    "            ini_beta      : initial value for \\beta\n",
    "            ini_gamma     : initial value for \\gamma\n",
    "        \n",
    "        Returns\n",
    "        -------\n",
    "            X             : discriptors + source features\n",
    "            X_train       : descriptors\n",
    "            X_source      : source features\n",
    "            n_sample      : number of sumples\n",
    "            dim_x         : dimension of the discriptor\n",
    "            \n",
    "            gram1         : Gram matrix using X_source\n",
    "            gram2         : Gram matrix using X_source\n",
    "            gram3         : Gram matrix using X_train\n",
    "            \n",
    "            n_loop        : maximum number of iterations\n",
    "\n",
    "            result_a      : dataframe to store a in all iterations\n",
    "            result_b      : dataframe to store b in all iterations\n",
    "            result_c      : dataframe to store c in all iterations\n",
    "            diff_a        : series to store the difference between \\alpha_1_new and \\alpha_1_old \n",
    "            diff_b        : series to store the difference between \\beta_new and \\beta_old\n",
    "            diff_c        : series to store the difference between \\gamma_new and \\gamma_old \n",
    "            diff          : series to store the difference between AllParams_new and AllParams_old \n",
    "        \"\"\"\n",
    "        # Setting\n",
    "        self.X_train = X.iloc[:,:dim_x]\n",
    "        self.X_source = X.iloc[:,dim_x:]\n",
    "        self.n_sample, self.dim_x = self.X_train.shape\n",
    "        self.X = X\n",
    "        self.y = y\n",
    "        self.n_loop = 1000\n",
    "        self.result_a = np.zeros([self.n_loop+1, self.n_sample])\n",
    "        self.result_b = np.zeros([self.n_loop+1, self.n_sample])\n",
    "        self.result_c = np.zeros([self.n_loop+1, self.n_sample])\n",
    "        self.diff_a = np.zeros(self.n_loop+1)\n",
    "        self.diff_b = np.zeros(self.n_loop+1)\n",
    "        self.diff_c = np.zeros(self.n_loop+1)\n",
    "        self.diff = np.zeros(self.n_loop+1)\n",
    "               \n",
    "        # Compute Gram matrices\n",
    "        self.gram1 = self.make_gram(self.X_source, self.X_source, gamma=self.gamma1, nu=self.nu, kernel=self.kernel)\n",
    "        self.gram2 = self.make_gram(self.X_source, self.X_source, gamma=self.gamma2, nu=self.nu, kernel=self.kernel)\n",
    "        self.gram3 = self.make_gram(self.X_train, self.X_train, gamma=self.gamma3, nu=self.nu, kernel=self.kernel)\n",
    "        \n",
    "        # Initialization\n",
    "        fix_seed(0)\n",
    "        self.b = np.random.randn(self.n_sample)\n",
    "        self.c = np.random.randn(self.n_sample)\n",
    "        self.a = np.zeros(self.n_sample)\n",
    "        self.i_count = 0\n",
    "        self.lambda_a = 0\n",
    "        self.lambda_b = 0\n",
    "        self.intercept_a = np.sum(self.y.values - self.gram1.dot(self.a))/(self.n_sample + self.lambda_a)\n",
    "        self.intercept_b = 0.5\n",
    "        \n",
    "        # Store\n",
    "        self.result_a[self.i_count] = self.a.reshape([-1])\n",
    "        self.result_b[self.i_count] = self.b.reshape([-1])\n",
    "        self.result_c[self.i_count] = self.c.reshape([-1])\n",
    "        self.diff_a[self.i_count] = np.nan\n",
    "        self.diff_b[self.i_count] = np.nan\n",
    "        self.diff_c[self.i_count] = np.nan\n",
    "        self.diff[self.i_count] = np.nan\n",
    "        \n",
    "        # Estimation\n",
    "        self.estimation_from_b()\n",
    "        \n",
    "        # Dataframe shaping\n",
    "        self.result_a = self.result_a[:(self.i_count+1),:]\n",
    "        self.result_b = self.result_b[:(self.i_count+1),:]\n",
    "        self.result_c = self.result_c[:(self.i_count+1),:]\n",
    "        self.diff_a = self.diff_a[:(self.i_count+1)]\n",
    "        self.diff_b = self.diff_b[:(self.i_count+1)]\n",
    "        self.diff_c = self.diff_c[:(self.i_count+1)]\n",
    "        self.diff = self.diff[:(self.i_count+1)]\n",
    "        \n",
    "        return self\n",
    "    \n",
    "    def predict(self, X):\n",
    "        \"\"\"\n",
    "        Prediction function\n",
    "            h(x) = (<\\beta, \\phi_2> + 1) * (<\\gamma, \\Phi_3>) + intercept\n",
    "            \n",
    "        Returns\n",
    "        -------\n",
    "            y_pred : (Gram2 * b + 1) * (Gram3 * b) + intercept\n",
    "        \"\"\"\n",
    "        X_source_pred = X.iloc[:,dim_x:]\n",
    "        X_train_pred = X.iloc[:,:dim_x]\n",
    "        \n",
    "        pred_gram1 = self.make_gram(X_source_pred, self.X_source, gamma=self.gamma1, nu=self.nu, kernel=self.kernel)\n",
    "        pred_gram2 = self.make_gram(X_source_pred, self.X_source, gamma=self.gamma2, nu=self.nu, kernel=self.kernel)\n",
    "        pred_gram3 = self.make_gram(X_train_pred, self.X_train, gamma=self.gamma3, nu=self.nu, kernel=self.kernel)\n",
    "        \n",
    "        self.pred1 = pred_gram1.dot(self.a)\n",
    "        self.pred2 = pred_gram2.dot(self.b)\n",
    "        self.pred3 = pred_gram3.dot(self.c)\n",
    "        y_pred = pred_gram1.dot(self.a) + self.intercept_a + (pred_gram2.dot(self.b)+self.intercept_b)*(pred_gram3.dot(self.c))\n",
    "        y_pred.index = X.index\n",
    "        \n",
    "        return y_pred\n",
    "\n",
    "    def score(self, X, y=None):\n",
    "        \"\"\"\n",
    "        Score function for cross-validation\n",
    "        \n",
    "        Returns\n",
    "        -------\n",
    "            -\\sum(y-\\hat{y})/n (Consider the minus value because 'GridSearchCV' maximize the score.)\n",
    "        \"\"\"\n",
    "        return -sum((y.values - self.predict(X).values)**2)/self.n_sample\n",
    "    \n",
    "    def get_params(self, deep=True):\n",
    "        \"\"\"\n",
    "        Create parameter dictionary for cross-validation\n",
    "        \n",
    "        Returns\n",
    "        -------\n",
    "            {'gamma1', 'gamma2', 'gamma3', 'lambda1', 'lambda2', 'lambda3', 'nu', 'kernel'}\n",
    "        \"\"\"\n",
    "        return {'gamma1' : self.gamma1,\n",
    "                'gamma2' : self.gamma2,\n",
    "                'gamma3' : self.gamma3,\n",
    "                'lambda1' : self.lambda1,\n",
    "                'lambda2' : self.lambda2,\n",
    "                'lambda3' : self.lambda3,\n",
    "                'nu' : self.nu,\n",
    "                'kernel' : self.kernel}\n",
    "    \n",
    "    def set_params(self, **parameters):\n",
    "        \"\"\"\n",
    "        For cross-validation\n",
    "        \"\"\"\n",
    "        for parameter, value in parameters.items():\n",
    "            setattr(self, parameter, value)\n",
    "        return self     "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class AffineTrans3(BaseEstimator, RegressorMixin):\n",
    "    def __init__(self, gamma1=1, gamma3=1, lambda1=1, lambda3=1, nu=1.5, kernel='linear'):\n",
    "        \"\"\"\n",
    "        Define the model class proposed in the paper\n",
    "            h(x) = intercept_a + <\\alpha, \\Phi_1> + <\\gamma, \\Phi_3>\n",
    "        \n",
    "        Parameters\n",
    "        ----------\n",
    "            gamma1  : length scale of the kernel1\n",
    "            gamma3  : length scale of the kernel3\n",
    "            lambda1 : regularization parameter for alpha\n",
    "            lambda3 : reguralization parameter for gamma\n",
    "            nu      : parameter controlling the smoothness of Matern kernel\n",
    "            kernel  : kernel used for the model ('rbf' or 'matern')\n",
    "        \"\"\"\n",
    "        self.gamma1 = gamma1\n",
    "        self.gamma3 = gamma3\n",
    "        self.lambda1 = lambda1\n",
    "        self.lambda3 = lambda3\n",
    "        self.nu = nu\n",
    "        self.kernel = kernel\n",
    "        \n",
    "    def make_gram(self, X, Y, gamma, nu, kernel):\n",
    "        \"\"\"\n",
    "        Making the Gram matrix\n",
    "            In sklearn, RBF kernel is defined as exp(-|x-x'|^2/(2l^2)).\n",
    "            To be consistent with the kernel used in the proposed method, in which RBF kernel is defined as exp(-\\gamma |x-x'|^2), we use 'length_sacle=np.sqrt(1/(2*gamma))'.\n",
    "        \"\"\"\n",
    "        if kernel=='rbf':\n",
    "            K = RBF(length_scale=np.sqrt(1/(2*gamma)))(X,Y)\n",
    "        elif kernel=='matern':\n",
    "            K = Matern(length_scale=np.sqrt(1/(2*gamma)), nu=nu)(X,Y)\n",
    "        return pd.DataFrame(K)\n",
    "    \n",
    "    def low_rank_inv(self, A, tol):\n",
    "        \"\"\"\n",
    "        Computing the low-rank approximation of a inverse matrix with SVD.\n",
    "        \n",
    "        Parameters\n",
    "        ----------\n",
    "            tol : Threshold below which SVD values are considered zero.\n",
    "        \"\"\"\n",
    "        u, s, vh = np.linalg.svd(A)\n",
    "        r = np.linalg.matrix_rank(np.diag(s), tol=tol)\n",
    "        ur = u[:, :r]\n",
    "        sr = s[:r]\n",
    "        vhr = vh[:r, :]\n",
    "        Ar = np.matmul(ur, np.multiply(sr[..., np.newaxis], vhr))\n",
    "        Ard = np.matmul(vhr.T, np.multiply(1/sr[..., np.newaxis], ur.T))\n",
    "        return Ard\n",
    "    \n",
    "    def fit(self, X, y=None):\n",
    "        \"\"\"\n",
    "        Model fitting\n",
    "            Note that unlike proposed method 1 and proposed method 2, the optimal parameters can be obtained analytically.\n",
    "        \n",
    "        Required grobal variables\n",
    "        -----------------------\n",
    "            dim_x         : dimension of the discriptor\n",
    "            ini_alpha     : initial value for \\alpha_1\n",
    "            ini_intercept : initial value for \\alpha_0\n",
    "            ini_beta      : initial value for \\beta\n",
    "            ini_gamma     : initial value for \\gamma\n",
    "        \n",
    "        Returns\n",
    "        -------\n",
    "            X             : discriptors + source features\n",
    "            X_train       : descriptors\n",
    "            X_source      : source features\n",
    "            n_sample      : number of sumples\n",
    "            dim_x         : dimension of the discriptor\n",
    "            \n",
    "            gram1         : Gram matrix using X_source\n",
    "            gram3         : Gram matrix using X_train\n",
    "        \"\"\"\n",
    "        # Setting\n",
    "        self.X_train = X.iloc[:,:dim_x]\n",
    "        self.X_source = X.iloc[:,dim_x:]\n",
    "        tol = 1e-5\n",
    "        self.n_sample, self.dim_x = self.X_train.shape\n",
    "        self.X = X\n",
    "        self.y = y\n",
    "               \n",
    "        # Compute Gram matrices\n",
    "        self.gram1 = self.make_gram(self.X_source, self.X_source, gamma=self.gamma1, nu=self.nu, kernel=self.kernel)\n",
    "        self.gram3 = self.make_gram(self.X_train, self.X_train, gamma=self.gamma3, nu=self.nu, kernel=self.kernel)\n",
    "        \n",
    "        # Estimate the optimal parameters\n",
    "        tmp_mat = np.block([\n",
    "            [self.gram1.dot(self.gram1) + self.lambda1 * self.gram1, self.gram1.dot(self.gram3)                          , pd.DataFrame(self.gram1.dot(np.ones(self.n_sample)))],\n",
    "            [self.gram3.dot(self.gram1),                             self.gram3.dot(self.gram3) + self.lambda3*self.gram3, pd.DataFrame(self.gram3.dot(np.ones(self.n_sample)))],\n",
    "            [pd.DataFrame(np.ones(self.n_sample).reshape(1,-1).dot(self.gram1)),\n",
    "                 pd.DataFrame(np.ones(self.n_sample).reshape(1,-1).dot(self.gram3)), \n",
    "                 pd.DataFrame(np.ones(self.n_sample).reshape(1,-1).dot(np.ones(self.n_sample)))]\n",
    "        ])\n",
    "        tmp_gram = np.block([\n",
    "            [self.gram1],\n",
    "            [self.gram3],\n",
    "            [pd.DataFrame(np.ones(self.n_sample).reshape(1,-1))]\n",
    "        ])\n",
    "        tmp_theta = self.low_rank_inv(tmp_mat, tol=tol).dot(tmp_gram).dot(self.y)\n",
    "        \n",
    "        # Split the parameters\n",
    "        self.a = tmp_theta[:self.n_sample]\n",
    "        self.c = tmp_theta[self.n_sample:(2*self.n_sample)]\n",
    "        self.intercept = tmp_theta[(2*self.n_sample):][0]\n",
    "        \n",
    "        return self\n",
    "    \n",
    "    def predict(self, X):\n",
    "        \"\"\"\n",
    "        Prediction function\n",
    "            h(x) = <\\alpha, \\Phi_1> + (<\\gamma, \\Phi_3>) + intercept\n",
    "            \n",
    "        Returns\n",
    "        -------\n",
    "            y_pred : Gram1 a + (Gram3 * b) + intercept\n",
    "        \"\"\"\n",
    "        X_source_pred = X.iloc[:,dim_x:]\n",
    "        X_train_pred = X.iloc[:,:dim_x]\n",
    "        \n",
    "        pred_gram1 = self.make_gram(X_source_pred, self.X_source, gamma=self.gamma1, nu=self.nu, kernel=self.kernel)\n",
    "        pred_gram3 = self.make_gram(X_train_pred, self.X_train, gamma=self.gamma3, nu=self.nu, kernel=self.kernel)\n",
    "        \n",
    "        self.pred1 = pred_gram1.dot(self.a)\n",
    "        self.pred3 = pred_gram3.dot(self.c)\n",
    "        y_pred = pred_gram1.dot(self.a) + (pred_gram3.dot(self.c)) + self.intercept\n",
    "        y_pred.index = X.index\n",
    "        \n",
    "        return y_pred\n",
    "\n",
    "    def score(self, X, y=None):\n",
    "        \"\"\"\n",
    "        Score function for cross-validation\n",
    "        \n",
    "        Returns\n",
    "        -------\n",
    "            -\\sum(y-\\hat{y})/n (Consider the minus value because 'GridSearchCV' maximize the score.)\n",
    "        \"\"\"\n",
    "        return -sum((y.values - self.predict(X).values)**2)/self.n_sample\n",
    "    \n",
    "    def get_params(self, deep=True):\n",
    "        \"\"\"\n",
    "        Create parameter dictionary for cross-validation\n",
    "        \n",
    "        Returns\n",
    "        -------\n",
    "            {'gamma1', 'gamma3', 'lambda1', 'lambda3', 'nu', 'kernel'}\n",
    "        \"\"\"\n",
    "        return {'gamma1' : self.gamma1,\n",
    "                'gamma3' : self.gamma3,\n",
    "                'lambda1' : self.lambda1,\n",
    "                'lambda3' : self.lambda3,\n",
    "                'nu' : self.nu,\n",
    "                'kernel' : self.kernel}\n",
    "    \n",
    "    def set_params(self, **parameters):\n",
    "        \"\"\"\n",
    "        For cross-validation\n",
    "        \"\"\"\n",
    "        for parameter, value in parameters.items():\n",
    "            setattr(self, parameter, value)\n",
    "        return self     "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## fix_seed function"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def fix_seed(seed):\n",
    "    # Numpy\n",
    "    np.random.seed(seed)\n",
    "    # for built-in random\n",
    "    random.seed(seed)\n",
    "    # for hash seed\n",
    "    os.environ[\"PYTHONHASHSEED\"] = str(seed)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Plot function"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "def plot_scatter(y_obs_list, \n",
    "                 y_prd_list, \n",
    "                 title_list, \n",
    "                 plt_row, \n",
    "                 plt_col, \n",
    "                 position_list, \n",
    "                 col_list,\n",
    "                 alpha_list,\n",
    "                 fig_size, \n",
    "                 save_name, \n",
    "                 title, \n",
    "                 show_flg=True):\n",
    "    fig = plt.figure(figsize=fig_size)\n",
    "\n",
    "    for i_plt in range(len(position_list)):\n",
    "        ax = fig.add_subplot(plt_row, plt_col, position_list[i_plt], \n",
    "                             title=title_list[i_plt], \n",
    "                             xlabel='Observation', \n",
    "                             ylabel='Prediction')\n",
    "        ax.scatter(y_obs_list[i_plt], y_prd_list[i_plt], color=col_list[i_plt], alpha=alpha_list[i_plt])\n",
    "        xy_min = min(ax.get_xlim()[0], ax.get_ylim()[0])\n",
    "        xy_max = max(ax.get_xlim()[1], ax.get_ylim()[1])\n",
    "        ax.axis('equal')\n",
    "        ax.axis('square')\n",
    "        ax.set_xlim([xy_min, xy_max])\n",
    "        ax.set_ylim([xy_min, xy_max])\n",
    "        ax.grid(color='gray', linestyle='dotted', linewidth=1, alpha=0.5)\n",
    "        ax.text(0.03, 0.93, 'Corr : '+str(round(np.corrcoef(y_prd_list[i_plt], y_obs_list[i_plt])[0,1], 4)), size=15, transform=ax.transAxes)\n",
    "        ax.text(0.03, 0.87, 'MSE : '+str(round(mean_squared_error(y_obs_list[i_plt], y_prd_list[i_plt]), 4)), size=15, transform=ax.transAxes)\n",
    "        ax.text(0.03, 0.81, 'MAE : '+str(round(mean_absolute_error(y_obs_list[i_plt], y_prd_list[i_plt]), 4)), size=15, transform=ax.transAxes)\n",
    "        _ = ax.plot([-300, 300], [-300, 300], color='gray', linewidth=0.5)\n",
    "\n",
    "    fig.tight_layout(rect=[0,0,1,0.96])\n",
    "    \n",
    "    plt.suptitle(title,fontsize=20)\n",
    "\n",
    "    fig.savefig(save_name)\n",
    "    if show_flg==False:\n",
    "        plt.close(fig)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Function to avoid zero division"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def avoid_zero(x, tsh=1, _add=0.1):\n",
    "    if np.abs(x) < tsh:\n",
    "        if x >= 0:\n",
    "            return tsh\n",
    "        else:\n",
    "            return -tsh\n",
    "    else:\n",
    "        return x"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Main codes"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Load data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# train_path = '../10_Data/SARCOS_train.csv'\n",
    "# test_path = '../10_Data/SARCOS_test.csv'\n",
    "train_path = '../10_Data/sarcos_inv.mat'\n",
    "test_path = '../10_Data/sarcos_inv_test.mat'\n",
    "axis_names = ['Position1','Position2','Position3','Position4','Position5','Position6','Position7',\n",
    "              'Velocity1','Velocity2','Velocity3','Velocity4','Velocity5','Velocity6','Velocity7',\n",
    "              'Acceleration1','Acceleration2','Acceleration3','Acceleration4','Acceleration5','Acceleration6','Acceleration7',\n",
    "              'Torque1','Torque2','Torque3','Torque4','Torque5','Torque6','Torque7']\n",
    "\n",
    "# sar_train = pd.read_csv(train_path, header=None)\n",
    "# sar_test = pd.read_csv(test_path, header=None)\n",
    "# sar_train = sar_train.set_axis(axis_names, axis=1)\n",
    "# sar_test = sar_test.set_axis(axis_names, axis=1)\n",
    "\n",
    "sar_train_all = io.loadmat(train_path)\n",
    "sar_test = io.loadmat(test_path)\n",
    "sar_train_all = pd.DataFrame(sar_train_all['sarcos_inv'], columns=axis_names)\n",
    "sar_test = pd.DataFrame(sar_test['sarcos_inv_test'], columns=axis_names)\n",
    "sar_train = sar_train_all.iloc[:30000, :]\n",
    "\n",
    "x_train = sar_train.iloc[:,0:21]\n",
    "x_test = sar_test.iloc[:,0:21]\n",
    "y_train_all = sar_train.iloc[:,21:]\n",
    "y_test_all = sar_test.iloc[:,21:]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## User parameter setting"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fix_seed(373)\n",
    "target_name_list = ['Torque1','Torque2','Torque3','Torque4','Torque5','Torque6','Torque7']\n",
    "n_sample_list = [5, 10, 15, 20, 30, 40, 50]\n",
    "max_itr = 20\n",
    "\n",
    "dim_x = 21\n",
    "n_all = 30000\n",
    "num_SourceTasks = 16\n",
    "\n",
    "# Kernel setting\n",
    "kernel_name = 'rbf'\n",
    "nu_ = 1.5\n",
    "i_seed = 0"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Make training sample ID list"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "fix_seed(373)\n",
    "sample_list = list()\n",
    "for n_try in range(max_itr):\n",
    "    fix_seed(n_try)\n",
    "    tmp_list = list(range(n_all))\n",
    "    random.shuffle(tmp_list)\n",
    "    sample_list.append(tmp_list)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# Storing dataframe\n",
    "df_result = pd.DataFrame(columns=['data_name','n_sample','n_itr','type', 'MSE', 'Corr', 'MAE', 'R2'])\n",
    "\n",
    "t0 = time.time()\n",
    "# Repeat for the different number of samples\n",
    "for num_train in n_sample_list:\n",
    "    \n",
    "    # Repeat for the different target torques\n",
    "    for target_name in target_name_list:\n",
    "        \n",
    "        # Predicted data\n",
    "        predicted_data = joblib.load('../30_Output/40_pkl/100_MakeSourceModel/110_Prediction_for_'+target_name+'-'+str(373)+'.pkl')\n",
    "        # s_train = predicted_data['output_train']\n",
    "        # s_test = predicted_data['output_test']\n",
    "        s_train = predicted_data['feature_train']\n",
    "        s_test = predicted_data['feature_test']\n",
    "\n",
    "        # Make target outputs and source features\n",
    "        y_train = y_train_all[target_name].copy()\n",
    "        y_test = y_test_all[target_name].copy()\n",
    "        # s_train = y_train_all[[s for s in target_name_list if s!=target_name]].copy()\n",
    "        # s_test = y_test_all[[s for s in target_name_list if s!=target_name]].copy()\n",
    "        \n",
    "        # Repeat for the different sample splits\n",
    "        for n_itr in range(max_itr):\n",
    "            print(target_name+'   n : '+str(num_train)+',  try : '+str(n_itr))\n",
    "            t1 = time.time()\n",
    "            \n",
    "            # Hyperparameter settings\n",
    "            alpha_list = np.append(10**np.linspace(-4, 10, 9), 0)\n",
    "            gamma_list = np.array([1])\n",
    "            \n",
    "            # Make training data\n",
    "            sample_id = sample_list[n_itr][:num_train]\n",
    "            x_train_tmp = x_train.iloc[sample_id,]\n",
    "            y_train_tmp = y_train.iloc[sample_id,]\n",
    "            s_train_tmp = s_train.iloc[sample_id,]\n",
    "\n",
    "            # Scaling parameters\n",
    "            ## Inputs\n",
    "            x_mean_tmp = x_train_tmp.mean()\n",
    "            x_std_tmp = x_train_tmp.std()\n",
    "            x_train_scal_tmp = (x_train_tmp - x_mean_tmp) / x_std_tmp.replace(0,1)\n",
    "            x_test_scal_tmp = (x_test - x_mean_tmp) / x_std_tmp.replace(0,1)\n",
    "            ## Outputs\n",
    "            y_mean_tmp = y_train_tmp.mean()\n",
    "            y_std_tmp = y_train_tmp.std()\n",
    "            y_train_scal_tmp = (y_train_tmp - y_mean_tmp) / y_std_tmp\n",
    "            y_test_scal_tmp = (y_test - y_mean_tmp) / y_std_tmp\n",
    "            # Source features\n",
    "            s_mean_tmp = s_train_tmp.mean()\n",
    "            s_std_tmp = s_train_tmp.std()\n",
    "            s_train_scal_tmp = (s_train_tmp - s_mean_tmp) / s_std_tmp.replace(0,1)\n",
    "            s_test_scal_tmp = (s_test - s_mean_tmp) / s_std_tmp.replace(0,1)\n",
    "\n",
    "            # Make combined dataframe\n",
    "            x_train_adds = pd.merge(x_train_scal_tmp, s_train_scal_tmp, left_index=True, right_index=True)\n",
    "            x_test_adds = pd.merge(x_test_scal_tmp, s_test_scal_tmp, left_index=True, right_index=True)\n",
    "\n",
    "            # Model training\n",
    "            ## Learn without Transfer\n",
    "            t_tmp = time.time()\n",
    "            ### Grid search\n",
    "            gsr_wotl = GridSearchCV(\n",
    "                KernelRidge_HM(),\n",
    "                {'lambda1' : alpha_list,\n",
    "                 'gamma'   : gamma_list/(2*dim_x),\n",
    "                 'nu'      : [nu_],\n",
    "                 'kernel'  : [kernel_name]},\n",
    "                scoring = 'neg_mean_squared_error',\n",
    "                cv = 5,\n",
    "                n_jobs = -1,\n",
    "                verbose = False\n",
    "            )\n",
    "            fix_seed(i_seed)\n",
    "            gsr_wotl.fit(x_train_scal_tmp, y_train_scal_tmp)\n",
    "            model_wotl = KernelRidge_HM(\n",
    "                lambda1 = gsr_wotl.best_params_['lambda1'],                  \n",
    "                gamma   = gsr_wotl.best_params_['gamma'],\n",
    "                nu      = gsr_wotl.best_params_['nu'],\n",
    "                kernel  = kernel_name\n",
    "            )\n",
    "            ### Final model training\n",
    "            fix_seed(i_seed)\n",
    "            model_wotl.fit(x_train_scal_tmp, y_train_scal_tmp)\n",
    "            y_fits_wotl = model_wotl.predict(x_train_scal_tmp)*y_std_tmp + y_mean_tmp\n",
    "            y_pred_wotl = model_wotl.predict(x_test_scal_tmp)*y_std_tmp + y_mean_tmp\n",
    "            print('   Learn without transfer has been done.    '+str(time.time()-t_tmp))\n",
    "\n",
    "            ## Learn using only source features\n",
    "            t_tmp = time.time()\n",
    "            ### Grid search\n",
    "            gsr_only = GridSearchCV(\n",
    "                KernelRidge_HM(),\n",
    "                {'lambda1' : alpha_list,\n",
    "                 'gamma'   : gamma_list/(2*num_SourceTasks),\n",
    "                 'nu'      : [nu_],\n",
    "                 'kernel'  : [kernel_name]},\n",
    "                scoring = 'neg_mean_squared_error',\n",
    "                cv = 5,\n",
    "                n_jobs = -1,\n",
    "                verbose = False\n",
    "            )\n",
    "            fix_seed(i_seed)\n",
    "            gsr_only.fit(s_train_scal_tmp, y_train_scal_tmp)\n",
    "            model_only = KernelRidge_HM(\n",
    "                lambda1 = gsr_only.best_params_['lambda1'],                  \n",
    "                gamma   = gsr_only.best_params_['gamma'],\n",
    "                nu      = gsr_only.best_params_['nu'],\n",
    "                kernel  = kernel_name\n",
    "            )\n",
    "            fix_seed(i_seed)\n",
    "            ### Final model training\n",
    "            model_only.fit(s_train_scal_tmp, y_train_scal_tmp)\n",
    "            y_fits_only = model_only.predict(s_train_scal_tmp)*y_std_tmp + y_mean_tmp\n",
    "            y_pred_only = model_only.predict(s_test_scal_tmp)*y_std_tmp + y_mean_tmp\n",
    "            print('   Learn only using the source features has been done.    '+str(time.time()-t_tmp))\n",
    "\n",
    "            ## Learn with source features\n",
    "            t_tmp = time.time()\n",
    "            ### Grid search\n",
    "            gsr_with = GridSearchCV(\n",
    "                KernelRidge_HM(),\n",
    "                {'lambda1' : alpha_list,\n",
    "                 'gamma'   : gamma_list/(2*(dim_x+num_SourceTasks)),\n",
    "                 'nu'      : [nu_],\n",
    "                 'kernel'  : [kernel_name]},\n",
    "                scoring = 'neg_mean_squared_error',\n",
    "                cv = 5,\n",
    "                n_jobs = -1,\n",
    "                verbose = False\n",
    "            )\n",
    "            fix_seed(i_seed)\n",
    "            gsr_with.fit(x_train_adds, y_train_scal_tmp)\n",
    "            model_with = KernelRidge_HM(\n",
    "                lambda1 = gsr_with.best_params_['lambda1'],                  \n",
    "                gamma   = gsr_with.best_params_['gamma'],\n",
    "                nu      = gsr_with.best_params_['nu'],\n",
    "                kernel  = kernel_name\n",
    "            )\n",
    "            ### Final model training\n",
    "            fix_seed(i_seed)\n",
    "            model_with.fit(x_train_adds, y_train_scal_tmp)\n",
    "            y_fits_with = model_with.predict(x_train_adds)*y_std_tmp + y_mean_tmp\n",
    "            y_pred_with = model_with.predict(x_test_adds)*y_std_tmp + y_mean_tmp\n",
    "            print('   Learn with the source features has been done.    '+str(time.time()-t_tmp))\n",
    "\n",
    "            ## Learn residuals from only source model\n",
    "            ### Compute the data\n",
    "            t_tmp = time.time()\n",
    "            y_train_res = y_train_tmp - y_fits_only\n",
    "            y_std_res = y_train_res.std()\n",
    "            if y_std_res == 0:\n",
    "                y_std_res = 1\n",
    "            y_mean_res = y_train_res.mean()\n",
    "            y_train_res_scal = (y_train_res - y_mean_res)/y_std_res\n",
    "            ### Grid search\n",
    "            gsr_res = GridSearchCV(\n",
    "                KernelRidge_HM(),\n",
    "                {'lambda1' : alpha_list,\n",
    "                 'gamma'   : gamma_list/(2*dim_x),\n",
    "                 'nu'      : [nu_],\n",
    "                 'kernel'  : [kernel_name]},\n",
    "                scoring = 'neg_mean_squared_error',\n",
    "                cv = 5,\n",
    "                n_jobs = -1,\n",
    "                verbose = False\n",
    "            )\n",
    "            ### Final model training\n",
    "            fix_seed(i_seed)\n",
    "            gsr_res.fit(x_train_scal_tmp, y_train_res_scal)\n",
    "            model_res = KernelRidge_HM(\n",
    "                lambda1 = gsr_res.best_params_['lambda1'],                  \n",
    "                gamma   = gsr_res.best_params_['gamma'],\n",
    "                nu      = gsr_res.best_params_['nu'],\n",
    "                kernel  = kernel_name\n",
    "            )\n",
    "            fix_seed(i_seed)\n",
    "            model_res.fit(x_train_scal_tmp, y_train_res_scal)\n",
    "            y_fits_res = model_res.predict(x_train_scal_tmp)*y_std_res + y_mean_res + y_fits_only\n",
    "            y_pred_res = model_res.predict(x_test_scal_tmp)*y_std_res + y_mean_res + y_pred_only\n",
    "            print('   Learn the residual has been done.    '+str(time.time()-t_tmp))\n",
    "\n",
    "            ## Learn ratio from only source model\n",
    "            t_tmp = time.time()\n",
    "            ### Compute the data\n",
    "            y_fits_only_tmp = pd.Series(y_fits_only).apply(avoid_zero,args=(np.max(np.abs(y_fits_only))/20, 0.1))\n",
    "            y_train_rate = y_train_tmp/y_fits_only_tmp\n",
    "            y_std_rate = y_train_rate.std()\n",
    "            y_mean_rate = y_train_rate.mean()\n",
    "            if y_std_rate == 0:\n",
    "                y_std_rate = 1\n",
    "            y_train_rate_scal = (y_train_rate - y_mean_rate)/y_std_rate\n",
    "            ### Grid search\n",
    "            gsr_rate = GridSearchCV(\n",
    "                KernelRidge_HM(),\n",
    "                {'lambda1' : alpha_list,\n",
    "                 'gamma'   : gamma_list/(2*dim_x),\n",
    "                 'nu'      : [nu_],\n",
    "                 'kernel'  : [kernel_name]},\n",
    "                scoring = 'neg_mean_squared_error',\n",
    "                cv = 5,\n",
    "                n_jobs = -1,\n",
    "                verbose = False\n",
    "            )\n",
    "            fix_seed(i_seed)\n",
    "            gsr_rate.fit(x_train_scal_tmp, y_train_rate_scal)\n",
    "            model_rate = KernelRidge_HM(\n",
    "                lambda1 = gsr_rate.best_params_['lambda1'],                  \n",
    "                gamma   = gsr_rate.best_params_['gamma'],\n",
    "                nu      = gsr_rate.best_params_['nu'],\n",
    "                kernel  = kernel_name\n",
    "            )\n",
    "            ### Final model training\n",
    "            fix_seed(i_seed)\n",
    "            model_rate.fit(x_train_scal_tmp, y_train_rate_scal)\n",
    "            y_fits_rate = (model_rate.predict(x_train_scal_tmp)*y_std_rate + y_mean_rate) * y_fits_only_tmp\n",
    "            y_pred_rate = (model_rate.predict(x_test_scal_tmp)*y_std_rate + y_mean_rate) * y_pred_only#_tmp\n",
    "            print('   Learn the ratio has been done.    '+str(time.time()-t_tmp))\n",
    "\n",
    "            ## Proposed mathod\n",
    "            t_tmp = time.time()\n",
    "            ### Grid search\n",
    "            SearchParams_AffineTrans = {\n",
    "                'gamma1'  : [gsr_only.best_params_['gamma']],\n",
    "                'gamma2'  : [gsr_only.best_params_['gamma']],\n",
    "                'gamma3'  : [gsr_wotl.best_params_['gamma']],\n",
    "                'lambda1' : [1e-3, 1e-2, 1e-1, 1],\n",
    "                'lambda2' : [1e-2, 1e-1, 1, 10],\n",
    "                'lambda3' : [1e-2, 1e-1, 1, 10],\n",
    "                'nu'      : [nu_],\n",
    "                'kernel'  : [kernel_name]\n",
    "            }\n",
    "            gsr_AffineTrans = GridSearchCV(\n",
    "                AffineTrans(),\n",
    "                SearchParams_AffineTrans,\n",
    "                scoring = 'neg_mean_squared_error',\n",
    "                cv = 5,\n",
    "                n_jobs = -1,\n",
    "                verbose = False\n",
    "            )\n",
    "            fix_seed(i_seed)\n",
    "            gsr_AffineTrans.fit(X=x_train_adds, y=y_train_scal_tmp)\n",
    "            model_AffineTrans = AffineTrans(\n",
    "                gamma1  = gsr_AffineTrans.best_params_['gamma1'],\n",
    "                gamma2  = gsr_AffineTrans.best_params_['gamma2'],\n",
    "                gamma3  = gsr_AffineTrans.best_params_['gamma3'],\n",
    "                lambda1 = gsr_AffineTrans.best_params_['lambda1'],\n",
    "                lambda2 = gsr_AffineTrans.best_params_['lambda2'],\n",
    "                lambda3 = gsr_AffineTrans.best_params_['lambda3'],\n",
    "                nu      = gsr_AffineTrans.best_params_['nu'],    \n",
    "                kernel  = gsr_AffineTrans.best_params_['kernel']\n",
    "            )\n",
    "            ### Final model training\n",
    "            fix_seed(i_seed)\n",
    "            model_AffineTrans.fit(X=x_train_adds, y=y_train_scal_tmp)\n",
    "            y_fits_AffineTrans = model_AffineTrans.predict(x_train_adds)*y_std_tmp + y_mean_tmp\n",
    "            y_pred_AffineTrans = model_AffineTrans.predict(x_test_adds)*y_std_tmp + y_mean_tmp\n",
    "            print('   Proposed method has been done.    '+str(time.time()-t_tmp))\n",
    "            \n",
    "            ## Proposed method 2\n",
    "            t_tmp = time.time()\n",
    "            ### Grid search\n",
    "            SearchParams_AffineTrans2 = {\n",
    "                'gamma1'  : [gsr_only.best_params_['gamma']],\n",
    "                'gamma2'  : [gsr_only.best_params_['gamma']],\n",
    "                'gamma3'  : [gsr_wotl.best_params_['gamma']],\n",
    "                'lambda1' : [0],\n",
    "                'lambda2' : [1e-3, 1e-2, 1e-1, 1, 10],\n",
    "                'lambda3' : [1e-3, 1e-2, 1e-1, 1, 10],\n",
    "                'nu'      : [nu_],\n",
    "                'kernel'  : [kernel_name]\n",
    "            }\n",
    "            gsr_AffineTrans2 = GridSearchCV(\n",
    "                AffineTrans2(),\n",
    "                SearchParams_AffineTrans2,\n",
    "                scoring = 'neg_mean_squared_error',\n",
    "                cv = 5,\n",
    "                n_jobs = -1,\n",
    "                verbose = False\n",
    "            )\n",
    "            fix_seed(i_seed)\n",
    "            gsr_AffineTrans2.fit(X=x_train_adds, y=y_train_scal_tmp)\n",
    "            model_AffineTrans2 = AffineTrans2(\n",
    "                gamma1  = gsr_AffineTrans2.best_params_['gamma1'],\n",
    "                gamma2  = gsr_AffineTrans2.best_params_['gamma2'],\n",
    "                gamma3  = gsr_AffineTrans2.best_params_['gamma3'],\n",
    "                lambda1 = gsr_AffineTrans2.best_params_['lambda1'],\n",
    "                lambda2 = gsr_AffineTrans2.best_params_['lambda2'],\n",
    "                lambda3 = gsr_AffineTrans2.best_params_['lambda3'],\n",
    "                nu      = gsr_AffineTrans.best_params_['nu'],    \n",
    "                kernel  = gsr_AffineTrans2.best_params_['kernel']\n",
    "            )\n",
    "            ### Final model training\n",
    "            fix_seed(i_seed)\n",
    "            model_AffineTrans2.fit(X=x_train_adds, y=y_train_scal_tmp)\n",
    "            y_fits_AffineTrans2 = model_AffineTrans2.predict(x_train_adds)*y_std_tmp + y_mean_tmp\n",
    "            y_pred_AffineTrans2 = model_AffineTrans2.predict(x_test_adds)*y_std_tmp + y_mean_tmp\n",
    "            print('   Proposed method 2 has been done.    '+str(time.time()-t_tmp))\n",
    "\n",
    "            ## Proposed method 3\n",
    "            t_tmp = time.time()\n",
    "            ### Grid search\n",
    "            SearchParams_AffineTrans3 = {\n",
    "                'gamma1'  : [gsr_only.best_params_['gamma']],\n",
    "                'gamma3'  : [gsr_wotl.best_params_['gamma']],\n",
    "                'lambda1' : [1e-3, 1e-2, 1e-1, 1, 10],\n",
    "                'lambda3' : [1e-3, 1e-2, 1e-1, 1, 10],\n",
    "                'nu'      : [nu_],\n",
    "                'kernel'  : [kernel_name]\n",
    "            }\n",
    "            gsr_AffineTrans3 = GridSearchCV(\n",
    "                AffineTrans3(),\n",
    "                SearchParams_AffineTrans3,\n",
    "                cv = 5,\n",
    "                n_jobs = -1,\n",
    "                verbose = False\n",
    "            )\n",
    "            fix_seed(i_seed)\n",
    "            gsr_AffineTrans3.fit(X=x_train_adds, y=y_train_scal_tmp)\n",
    "            model_AffineTrans3 = AffineTrans3(\n",
    "                gamma1  = gsr_AffineTrans3.best_params_['gamma1'],\n",
    "                gamma3  = gsr_AffineTrans3.best_params_['gamma3'],\n",
    "                lambda1 = gsr_AffineTrans3.best_params_['lambda1'],\n",
    "                lambda3 = gsr_AffineTrans3.best_params_['lambda3'],\n",
    "                nu      = gsr_AffineTrans.best_params_['nu'],    \n",
    "                kernel  = gsr_AffineTrans3.best_params_['kernel']\n",
    "            )\n",
    "            ### Final model training\n",
    "            fix_seed(i_seed)\n",
    "            model_AffineTrans3.fit(X=x_train_adds, y=y_train_scal_tmp)\n",
    "            y_fits_AffineTrans3 = model_AffineTrans3.predict(x_train_adds)*y_std_tmp + y_mean_tmp\n",
    "            y_pred_AffineTrans3 = model_AffineTrans3.predict(x_test_adds)*y_std_tmp + y_mean_tmp\n",
    "            print('   Proposed method 3 has been done.    '+str(time.time()-t_tmp))\n",
    "\n",
    "            # Save results\n",
    "            ## Plot\n",
    "            if not os.path.isdir('../30_Output/20_Plot/300_TransferLearning/'+target_name+'/n'+str(num_train)):\n",
    "                os.makedirs('../30_Output/20_Plot/300_TransferLearning/'+target_name+'/n'+str(num_train))\n",
    "            plot_scatter(y_obs_list = [y_train_tmp, y_train_tmp, y_train_tmp, y_train_tmp, y_train_tmp, y_train_tmp, y_train_tmp, y_train_tmp,\n",
    "                                      y_test, y_test, y_test, y_test, y_test, y_test, y_test, y_test],\n",
    "                        y_prd_list = [y_fits_wotl, y_fits_only, y_fits_with, y_fits_res, y_fits_rate, y_fits_AffineTrans, y_fits_AffineTrans2, y_fits_AffineTrans3,\n",
    "                                     y_pred_wotl, y_pred_only, y_pred_with, y_pred_res, y_pred_rate, y_pred_AffineTrans, y_pred_AffineTrans2, y_pred_AffineTrans3],\n",
    "                        title_list = ['Without transfer (train)', \n",
    "                                      'Using only source features (train)', \n",
    "                                      'With source features (train)', \n",
    "                                      'Learning the residual (train)', \n",
    "                                      'Learning the ratio (train)', \n",
    "                                      'Proposed method (train)',\n",
    "                                      'Proposed method 2 (train)',\n",
    "                                      'Proposed method 3 (train)',\n",
    "                                      'Without transfer (test)', \n",
    "                                      'Using only source features (test)', \n",
    "                                      'With source features (test)', \n",
    "                                      'Learning the residual (test)', \n",
    "                                      'Learning the ratio (test)', \n",
    "                                      'Proposed method (test)',\n",
    "                                      'Proposed method 2 (test)',\n",
    "                                      'Proposed method 3 (test)'],\n",
    "                        plt_row = 4,\n",
    "                        plt_col = 4,\n",
    "                        position_list = [1, 3, 5, 7, 9, 11, 13, 15,\n",
    "                                         2, 4, 6, 8, 10, 12, 14, 16],\n",
    "                        col_list = ['steelblue','steelblue','steelblue','steelblue','steelblue','steelblue','steelblue','steelblue',\n",
    "                                    'tomato', 'tomato', 'tomato', 'tomato', 'tomato', 'tomato', 'tomato', 'tomato'],\n",
    "                        alpha_list = [1,1,1,1,1,1,1,1,\n",
    "                                      0.1, 0.1, 0.1, 0.1, 0.1, 0.1,0.1,0.1],\n",
    "                        fig_size = (20,20),\n",
    "                        save_name = '../30_Output/20_Plot/300_TransferLearning/'+target_name+'/n'+str(num_train)+'/'+'301_'+target_name+'_n'+str(num_train)+'_'+str(n_itr)+'.png',\n",
    "                        title=target_name+',   n : '+str(num_train)+',  try : '+str(n_itr),\n",
    "                        show_flg=False)\n",
    "\n",
    "            ## Dataframe\n",
    "            df_result = pd.concat([df_result,\n",
    "                                pd.DataFrame(np.array([\n",
    "                                    target_name, \n",
    "                                    num_train,\n",
    "                                    n_itr,\n",
    "                                    'Without transfer',\n",
    "                                    mean_squared_error(y_test, y_pred_wotl),\n",
    "                                    np.corrcoef(y_test, y_pred_wotl)[0,1],\n",
    "                                    mean_absolute_error(y_test, y_pred_wotl),\n",
    "                                    r2_score(y_test, y_pred_wotl)\n",
    "                                 ]).reshape(1, -1), columns=['data_name','n_sample','n_itr','type', 'MSE', 'Corr', 'MAE', 'R2'], \n",
    "                                index=[target_name+'_n'+str(num_train)+'_itr'+str(n_itr)+'_WOTL'])], axis=0)\n",
    "            df_result = pd.concat([df_result,\n",
    "                                pd.DataFrame(np.array([\n",
    "                                    target_name, \n",
    "                                    num_train,\n",
    "                                    n_itr,\n",
    "                                    'Using only source features',\n",
    "                                    mean_squared_error(y_test, y_pred_only),\n",
    "                                    np.corrcoef(y_test, y_pred_only)[0,1],\n",
    "                                    mean_absolute_error(y_test, y_pred_only),\n",
    "                                    r2_score(y_test, y_pred_only)\n",
    "                                 ]).reshape(1, -1), columns=['data_name','n_sample','n_itr','type', 'MSE', 'Corr', 'MAE', 'R2'], \n",
    "                                index=[target_name+'_n'+str(num_train)+'_itr'+str(n_itr)+'_ONLY'])], axis=0)\n",
    "            df_result = pd.concat([df_result,\n",
    "                                pd.DataFrame(np.array([\n",
    "                                    target_name, \n",
    "                                    num_train,\n",
    "                                    n_itr,\n",
    "                                    'With source features',\n",
    "                                    mean_squared_error(y_test, y_pred_with),\n",
    "                                    np.corrcoef(y_test, y_pred_with)[0,1],\n",
    "                                    mean_absolute_error(y_test, y_pred_with),\n",
    "                                    r2_score(y_test, y_pred_with)\n",
    "                                 ]).reshape(1, -1), columns=['data_name','n_sample','n_itr','type', 'MSE', 'Corr', 'MAE', 'R2'], \n",
    "                                index=[target_name+'_n'+str(num_train)+'_itr'+str(n_itr)+'_WITH'])], axis=0)\n",
    "            df_result = pd.concat([df_result,\n",
    "                                pd.DataFrame(np.array([\n",
    "                                    target_name, \n",
    "                                    num_train,\n",
    "                                    n_itr,\n",
    "                                    'Leraning the residuals',\n",
    "                                    mean_squared_error(y_test, y_pred_res),\n",
    "                                    np.corrcoef(y_test, y_pred_res)[0,1],\n",
    "                                    mean_absolute_error(y_test, y_pred_res),\n",
    "                                    r2_score(y_test, y_pred_res)\n",
    "                                 ]).reshape(1, -1), columns=['data_name','n_sample','n_itr','type', 'MSE', 'Corr', 'MAE', 'R2'], \n",
    "                                index=[target_name+'_n'+str(num_train)+'_itr'+str(n_itr)+'_RES'])], axis=0)\n",
    "\n",
    "            df_result = pd.concat([df_result,\n",
    "                                pd.DataFrame(np.array([\n",
    "                                    target_name, \n",
    "                                    num_train,\n",
    "                                    n_itr,\n",
    "                                    'Learning the rates',\n",
    "                                    mean_squared_error(y_test, y_pred_rate),\n",
    "                                    np.corrcoef(y_test, y_pred_rate)[0,1],\n",
    "                                    mean_absolute_error(y_test, y_pred_rate),\n",
    "                                    r2_score(y_test, y_pred_rate)\n",
    "                                 ]).reshape(1, -1), columns=['data_name','n_sample','n_itr','type', 'MSE', 'Corr', 'MAE', 'R2'], \n",
    "                                index=[target_name+'_n'+str(num_train)+'_itr'+str(n_itr)+'_RATE'])], axis=0)\n",
    "            df_result = pd.concat([df_result,\n",
    "                                pd.DataFrame(np.array([\n",
    "                                    target_name, \n",
    "                                    num_train,\n",
    "                                    n_itr,\n",
    "                                    'Proposed method',\n",
    "                                    mean_squared_error(y_test, y_pred_AffineTrans),\n",
    "                                    np.corrcoef(y_test, y_pred_AffineTrans)[0,1],\n",
    "                                    mean_absolute_error(y_test, y_pred_AffineTrans),\n",
    "                                    r2_score(y_test, y_pred_AffineTrans)\n",
    "                                 ]).reshape(1, -1), columns=['data_name','n_sample','n_itr','type', 'MSE', 'Corr', 'MAE', 'R2'], \n",
    "                                index=[target_name+'_n'+str(num_train)+'_itr'+str(n_itr)+'_AffineTrans'])], axis=0)\n",
    "            df_result = pd.concat([df_result,\n",
    "                                pd.DataFrame(np.array([\n",
    "                                    target_name, \n",
    "                                    num_train,\n",
    "                                    n_itr,\n",
    "                                    'Proposed method 2',\n",
    "                                    mean_squared_error(y_test, y_pred_AffineTrans2),\n",
    "                                    np.corrcoef(y_test, y_pred_AffineTrans2)[0,1],\n",
    "                                    mean_absolute_error(y_test, y_pred_AffineTrans2),\n",
    "                                    r2_score(y_test, y_pred_AffineTrans2)\n",
    "                                 ]).reshape(1, -1), columns=['data_name','n_sample','n_itr','type', 'MSE', 'Corr', 'MAE', 'R2'], \n",
    "                                index=[target_name+'_n'+str(num_train)+'_itr'+str(n_itr)+'_AffineTrans2'])], axis=0)\n",
    "            df_result = pd.concat([df_result,\n",
    "                                pd.DataFrame(np.array([\n",
    "                                    target_name, \n",
    "                                    num_train,\n",
    "                                    n_itr,\n",
    "                                    'Proposed method 3',\n",
    "                                    mean_squared_error(y_test, y_pred_AffineTrans3),\n",
    "                                    np.corrcoef(y_test, y_pred_AffineTrans3)[0,1],\n",
    "                                    mean_absolute_error(y_test, y_pred_AffineTrans3),\n",
    "                                    r2_score(y_test, y_pred_AffineTrans3)\n",
    "                                 ]).reshape(1, -1), columns=['data_name','n_sample','n_itr','type', 'MSE', 'Corr', 'MAE', 'R2'], \n",
    "                                index=[target_name+'_n'+str(num_train)+'_itr'+str(n_itr)+'_AffineTrans3'])], axis=0)\n",
    "            df_result.to_csv('../30_Output/30_csv/300_TransferLearning_Result_ext.csv')\n",
    "            \n",
    "            clear_output(True)\n",
    "            print(time.time()-t1, ' / ', time.time()-t0)\n",
    "clear_output(True)\n",
    "print(time.time()-t0)\n",
    "print('*** Succeeded ***')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
