{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Python packages used in this code"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib\n",
    "import matplotlib.pyplot as plt\n",
    "import os\n",
    "import pickle\n",
    "import sklearn\n",
    "import platform\n",
    "import sys\n",
    "from sklearn.base import BaseEstimator, RegressorMixin\n",
    "import seaborn as sns\n",
    "\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\"\n",
    "Environments\n",
    "\n",
    "--Platform--\n",
    "OS : Windows-10-10.0.19044-SP0\n",
    "--Version--\n",
    "python :  3.9.12 (main, Apr  4 2022, 05:22:27) [MSC v.1916 64 bit (AMD64)]\n",
    "numpy : 1.23.1\n",
    "pandas : 1.4.3\n",
    "sklearn : 1.1.1\n",
    "seaborn : 0.11.2\n",
    "\"\"\"\n",
    "\n",
    "print('--Platform--')\n",
    "print('OS :', platform.platform())\n",
    "print('--Version--')\n",
    "print('python : ', sys.version)\n",
    "print('numpy :', np.__version__)\n",
    "print('pandas :', pd.__version__)\n",
    "print('sklearn :', sklearn.__version__)\n",
    "print('seaborn :', sns.__version__)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Preparation"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Create output directories"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if not os.path.isdir('../30_Output/10_Model/400_MakeResult'):\n",
    "    os.makedirs('../30_Output/10_Model/400_MakeResult')\n",
    "if not os.path.isdir('../30_Output/20_Plot/400_MakeResult'):\n",
    "    os.makedirs('../30_Output/20_Plot/400_MakeResult')\n",
    "if not os.path.isdir('../30_Output/30_csv/400_MakeResult'):\n",
    "    os.makedirs('../30_Output/30_csv/400_MakeResult')\n",
    "if not os.path.isdir('../30_Output/40_pkl/400_MakeResult'):\n",
    "    os.makedirs('../30_Output/40_pkl/400_MakeResult')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Model class"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class cls_FusedRidge(BaseEstimator, RegressorMixin):\n",
    "    def __init__(self, lambda_scale=1, lambda_trend=1):\n",
    "        \"\"\"\n",
    "        Define the model class for log-difference model\n",
    "        \n",
    "        Parameters\n",
    "        ----------\n",
    "            lambda_scale : reguralization parameter for the scale of gamma\n",
    "            lambda_trend : reguralization parameter for the trend of gamma\n",
    "        \"\"\"\n",
    "        self.lambda_scale = lambda_scale\n",
    "        self.lambda_trend = lambda_trend\n",
    "    \n",
    "    def make_D(self, n_features):\n",
    "        trend_matrix = np.eye(n_features - 1, n_features, k=1) - np.eye(n_features - 1, n_features)\n",
    "        trend_matrix[[9, 29, 49, 69, 89, 109, 129, 149, 169]] = np.zeros(n_features)\n",
    "        \n",
    "        if self.lambda_scale == 0:\n",
    "            return self.lambda_trend * trend_matrix\n",
    "        elif self.lambda_trend == 0:\n",
    "            return self.lambda_scale * np.identity(n_features)\n",
    "        else:\n",
    "            generated = np.vstack([self.lambda_scale * np.identity(n_features),\n",
    "                                   self.lambda_trend * trend_matrix])\n",
    "            return generated\n",
    "        \n",
    "    def fit(self, X, y=None):\n",
    "        \"\"\"\n",
    "        Model fitting\n",
    "        \n",
    "        Required grobal variables\n",
    "        -----------------------\n",
    "        \n",
    "        Returns\n",
    "        -------\n",
    "\n",
    "        \"\"\"\n",
    "        # dataset\n",
    "        self.X = X\n",
    "        self.y = y\n",
    "        \n",
    "        # dimension\n",
    "        self.n_sample, self.dim_X = self.X.shape\n",
    "        \n",
    "        # Matrices\n",
    "        self.D = self.make_D(self.dim_X)\n",
    "        self.D2 = np.transpose(self.D).dot(self.D)\n",
    "        \n",
    "        # Fit\n",
    "        self.intercept = np.mean(self.y)\n",
    "        self.theta = np.linalg.pinv(self.X.T.dot(self.X) + self.D2).dot(self.X.T).dot(self.y-self.intercept)\n",
    "\n",
    "        return self\n",
    "    \n",
    "    def predict(self, X):\n",
    "        \"\"\"\n",
    "        Prediction function\n",
    "            \n",
    "        Returns\n",
    "        -------\n",
    "        \"\"\"\n",
    "        return X.dot(self.theta) + self.intercept\n",
    "\n",
    "    def score(self, X, y=None):\n",
    "        \"\"\"\n",
    "        Score function for cross-validation\n",
    "        \n",
    "        Returns\n",
    "        -------\n",
    "            -\\sum(y-\\hat{y})/n (Consider the minus value because 'GridSearchCV' maximize the score.)\n",
    "        \"\"\"\n",
    "        return -sum((y.values - self.predict(X).values)**2)/self.n_sample\n",
    "    \n",
    "    def get_params(self, deep=True):\n",
    "        \"\"\"\n",
    "        Create parameter dictionary for cross-validation\n",
    "        \n",
    "        Returns\n",
    "        -------\n",
    "        \"\"\"\n",
    "        return {'lambda_scale' : self.lambda_scale,\n",
    "                'lambda_trend' : self.lambda_trend}\n",
    "    \n",
    "    def set_params(self, **parameters):\n",
    "        \"\"\"\n",
    "        For cross-validation\n",
    "        \"\"\"\n",
    "        for parameter, value in parameters.items():\n",
    "            setattr(self, parameter, value)\n",
    "        return self     "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class cls_InvTrans_FR(BaseEstimator, RegressorMixin):\n",
    "    def __init__(self, lambda1=1, lambda2=1, lambda3_scale=1, lambda3_trend=1):\n",
    "        \"\"\"\n",
    "        Define the model class proposed in the paper\n",
    "            h(x) = \\alpha_1*ys + \\alpha_0 + (\\beta*ys + 1)<\\gamma, x>\n",
    "                x  : discriptor\n",
    "                ys : CP(MD)\n",
    "        \n",
    "        Parameters\n",
    "        ----------\n",
    "            lambda1       : regularization parameter for alpha\n",
    "            lambda2       : regularization parameter for beta\n",
    "            lambda3_scale : reguralization parameter for the scale of gamma\n",
    "            lambda3_trend : reguralization parameter for the trend of gamma\n",
    "        \"\"\"\n",
    "        self.lambda1 = lambda1\n",
    "        self.lambda2 = lambda2\n",
    "        self.lambda3_scale = lambda3_scale\n",
    "        self.lambda3_trend = lambda3_trend\n",
    "    \n",
    "    def estimation_alpha(self):\n",
    "        \"\"\"\n",
    "        Optimization with respect to alpha\n",
    "            [\\hat{\\alpha}_1, \\hat{\\alpha}_2]^T \n",
    "                = (Ys^TYs + n\\Lambda_1)^{-1} Ys^T (y + (\\beta*ys+1)*<\\gamma, x>)\n",
    "            \n",
    "                    Ys = | ys_1  1 | \\in R^{n*2}, \\Lambda_1 = | \\lambda1  0 |, ys = | ys_1 |\n",
    "                         | ys_2  1 |                          |        0  0 |       | ys_2 |\n",
    "                              :                                                     |   :  |\n",
    "                         | ys_n  1 |                                                | ys_n |\n",
    "                         \n",
    "        Note that the regularization applies only to \\alpha_1 and not to the intercept \\alpha_1.\n",
    "        \"\"\"\n",
    "        self.alpha = self.InvMat.dot(self.X_source1.T).dot(self.y + (self.Mat2+1)*(self.Mat3))\n",
    "        self.Mat1 = self.X_source1.dot(self.alpha)\n",
    "        self.result_alpha[self.i_count] = self.alpha\n",
    "        return self\n",
    "    \n",
    "    def estimation_beta(self):\n",
    "        \"\"\"\n",
    "        Optimization with respect to beta\n",
    "            \\hat{\\beta}\n",
    "                = -(ys^T Diag(X\\gamma)^2 ys + n\\lambda_2)^{-1} ys^T Diag(X\\gamma) (y - Ys\\alpha + X\\gamma)\n",
    "        \"\"\"\n",
    "        tmp_mat1 = np.linalg.pinv(self.X_source2.T.dot(np.diag(self.Mat3)).dot(np.diag(self.Mat3)).values.dot(self.X_source2.values) + self.n_sample*self.lambda2*np.diag(np.ones(self.dim_X_source2)), hermitian=True)\n",
    "        self.beta = -tmp_mat1.dot(self.X_source2.T).dot(np.diag(self.Mat3)).dot(self.y-self.Mat1+self.Mat3)\n",
    "        self.Mat2 = self.X_source2.dot(self.beta)\n",
    "        self.result_beta[self.i_count] = self.beta\n",
    "        return self\n",
    "    \n",
    "    def estimation_gamma(self):\n",
    "        \"\"\"\n",
    "        Optimization with respect to gamma\n",
    "        \"\"\"\n",
    "        tmp_x = pd.DataFrame(np.diag(self.X_source2.values.reshape(-1)*self.beta+1).dot(self.X), index=self.X.index, columns=self.X.columns)\n",
    "        tmp_y = self.y-self.Mat1\n",
    "        \n",
    "        self.tmp_x = tmp_x\n",
    "        self.tmp_y = tmp_y\n",
    "                \n",
    "        fix_seed(373)\n",
    "        self.gamma = -np.linalg.pinv(tmp_x.T.dot(tmp_x) + self.D2).dot(tmp_x.T).dot(tmp_y)\n",
    "        \n",
    "        self.Mat3 = self.X.dot(self.gamma)\n",
    "        self.result_gamma[self.i_count] = self.gamma\n",
    "        return self\n",
    "    \n",
    "    def make_diff(self, w_new, w_old):\n",
    "        \"\"\"\n",
    "        Function to calculate parameter changes for algorithm convergence determination\n",
    "        We use \\max{|w_new - w_old|}/\\max{|w_old|} for determining the convergence.\n",
    "        This criterion is used in some algorithms in scikit-learn, for example, see https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Lasso.html\n",
    "        We apply this criterion to each of \\alpha, \\beta and \\gamma separately and use their maximum value for the convergence decision.\n",
    "        \"\"\"\n",
    "        diff1 = np.max(np.abs(w_new-w_old))\n",
    "        diff2 = np.max(np.abs(w_old))\n",
    "        if diff2 < 1e-10: #Avoiding division by zero\n",
    "            out = 0\n",
    "        else:\n",
    "            out = diff1/diff2\n",
    "        return out\n",
    "        \n",
    "    def make_D(self, n_features):\n",
    "        trend_matrix = np.eye(n_features - 1, n_features, k=1) - np.eye(n_features - 1, n_features)\n",
    "        trend_matrix[[9, 29, 49, 69, 89, 109, 129, 149, 169]] = np.zeros(n_features)\n",
    "        \n",
    "        if self.lambda3_scale == 0:\n",
    "            return self.lambda3_trend * trend_matrix\n",
    "        elif self.lambda3_trend == 0:\n",
    "            return self.lambda3_scale * np.identity(n_features)\n",
    "        else:\n",
    "            generated = np.vstack([self.lambda3_scale * np.identity(n_features),\n",
    "                                   self.lambda3_trend * trend_matrix])\n",
    "            return generated\n",
    "        \n",
    "    def fit(self, X, y=None):\n",
    "        \"\"\"\n",
    "        Model fitting\n",
    "        \n",
    "        Required grobal variables\n",
    "        -----------------------\n",
    "            dim_x         : dimension of the discriptor\n",
    "            ini_alpha     : initial value for \\alpha_1\n",
    "            ini_intercept : initial value for \\alpha_0\n",
    "            ini_beta      : initial value for \\beta\n",
    "            ini_gamma     : initial value for \\gamma\n",
    "        \n",
    "        Returns\n",
    "        -------\n",
    "            i_count       : counter for the iterations\n",
    "            n_loop        : maximum number of iterations\n",
    "            convergence   : flag indicating whether the algorithm has converged before the maximum iteration\n",
    "            error         : flag indication whether the algorithm has terminated with an error\n",
    "            \n",
    "            Input         : X=[discriptors, source features], y=output\n",
    "            X             : descriptors\n",
    "            X_source1     : source features + all-one vector\n",
    "            X_source2     : source features\n",
    "            y             : output\n",
    "            n_sample      : number of sumples\n",
    "            dim_X         : dimension of the discriptor\n",
    "            dim_X_source1 : dimension of the source features + intercept\n",
    "            dim_X_source2 : dimension of the source features\n",
    "\n",
    "            result_alpha  : dataframe to store alpha in all iterations\n",
    "            result_beta   : dataframe to store beta in all iterations\n",
    "            result_gamma  : dataframe to store gamma in all iterations\n",
    "            diff_i        : series to store the difference between \\alpha_0_new and \\alpha_0_old \n",
    "            diff_a        : series to store the difference between \\alpha_1_new and \\alpha_1_old \n",
    "            diff_b        : series to store the difference between \\beta_new and \\beta_old\n",
    "            diff_c        : series to store the difference between \\gamma_new and \\gamma_old \n",
    "            diff          : series to store the difference between AllParams_new and AllParams_old \n",
    "\n",
    "            InvMat        : (Ys^TYs + n\\Lambda_1)^{-1}\n",
    "            Mat1          : Ys\\alpha (updated with every update of \\alpha)\n",
    "            Mat2          : ys\\beta (updated with every update of \\alpha)\n",
    "            Mat3          : X\\gamma (updated with every update of \\alpha)\n",
    "        \"\"\"\n",
    "        # setting\n",
    "        self.i_count = 0        \n",
    "        self.n_loop = 1000\n",
    "        self.convergence = False\n",
    "        self.error = False\n",
    "        \n",
    "        # dataset\n",
    "        self.X = X.iloc[:,:dim_x]\n",
    "        self.X_source1 = X.iloc[:,dim_x:].copy()\n",
    "        self.X_source1['Intercept'] = 1\n",
    "        self.X_source2 = X.iloc[:,dim_x:].copy()\n",
    "        self.y = y\n",
    "        \n",
    "        # dimension\n",
    "        self.n_sample, self.dim_X = self.X.shape\n",
    "        self.dim_X_source1 = self.X_source1.shape[1]\n",
    "        self.dim_X_source2 = self.X_source2.shape[1]\n",
    "        \n",
    "        # for storing the results\n",
    "        self.result_alpha = np.zeros([self.n_loop+1, self.dim_X_source1])\n",
    "        self.result_beta = np.zeros([self.n_loop+1, self.dim_X_source2])\n",
    "        self.result_gamma = np.zeros([self.n_loop+1, self.dim_X])\n",
    "        self.diff_i = np.zeros(self.n_loop+1)\n",
    "        self.diff_a = np.zeros(self.n_loop+1)\n",
    "        self.diff_b = np.zeros(self.n_loop+1)\n",
    "        self.diff_c = np.zeros(self.n_loop+1)\n",
    "        self.diff = np.zeros(self.n_loop+1)\n",
    "        \n",
    "        # initialize the parameters\n",
    "        self.alpha = np.array([ini_alpha, ini_intercept])\n",
    "        self.beta = np.array([ini_beta])\n",
    "        self.gamma = -ini_gamma\n",
    "        self.result_alpha[0] = self.alpha\n",
    "        self.result_beta[0] = self.beta\n",
    "        self.result_gamma[0] = self.gamma\n",
    "        self.diff[0] = np.nan\n",
    "        \n",
    "        # Matrices\n",
    "        self.InvMat = np.linalg.pinv(self.X_source1.T.dot(self.X_source1) + self.n_sample*self.lambda1*np.diag(np.ones(self.dim_X_source1-1).tolist()+[0]), hermitian=True)\n",
    "        self.Mat1 = self.X_source1.dot(self.alpha)\n",
    "        self.Mat2 = self.X_source2.dot(self.beta)\n",
    "        self.Mat3 = self.X.dot(self.gamma)\n",
    "        self.D = self.make_D(self.dim_X)\n",
    "        self.D2 = np.transpose(self.D).dot(self.D)\n",
    "\n",
    "        # try:\n",
    "        # Repeat until convergence\n",
    "        for i_loop in range(self.n_loop):\n",
    "            self.i_count += 1\n",
    "\n",
    "            # Update\n",
    "            self.estimation_alpha()\n",
    "            self.estimation_beta()\n",
    "            self.estimation_gamma()\n",
    "\n",
    "            # Compute changes of parameters\n",
    "            diff_i = self.make_diff(w_new=self.result_alpha[self.i_count][self.dim_X_source1-1], w_old=self.result_alpha[self.i_count-1][self.dim_X_source1-1])\n",
    "            diff_a = self.make_diff(w_new=self.result_alpha[self.i_count][:(self.dim_X_source1-1)], w_old=self.result_alpha[self.i_count-1][:(self.dim_X_source1-1)])\n",
    "            diff_b = self.make_diff(w_new=self.result_beta[self.i_count], w_old=self.result_beta[self.i_count-1])\n",
    "            diff_c = self.make_diff(w_new=self.result_gamma[self.i_count], w_old=self.result_gamma[self.i_count-1])\n",
    "            diff = np.max([diff_i, diff_a, diff_b, diff_c]) # We use the maximum value of {diff_i, diff_a, diff_b, diff_c}.\n",
    "            # Store\n",
    "            self.diff_i[self.i_count] = diff_i\n",
    "            self.diff_a[self.i_count] = diff_a\n",
    "            self.diff_b[self.i_count] = diff_b\n",
    "            self.diff_c[self.i_count] = diff_c\n",
    "            self.diff[self.i_count] = diff\n",
    "\n",
    "            # Check the convergence\n",
    "            if diff < 1e-3:\n",
    "                self.convergence = True\n",
    "                break\n",
    "        # except:\n",
    "            # self.error = True\n",
    "        \n",
    "        # Cut off the unused parts of the dataframes\n",
    "        self.result_alpha = self.result_alpha[:(self.i_count+1),:]\n",
    "        self.result_beta = self.result_beta[:(self.i_count+1),:]\n",
    "        self.result_gamma = self.result_gamma[:(self.i_count+1),:]\n",
    "        self.diff_i = self.diff_i[:(self.i_count+1)]\n",
    "        self.diff_a = self.diff_a[:(self.i_count+1)]\n",
    "        self.diff_b = self.diff_b[:(self.i_count+1)]\n",
    "        self.diff_c = self.diff_c[:(self.i_count+1)]\n",
    "        self.diff = self.diff[:(self.i_count+1)]\n",
    "        \n",
    "        return self\n",
    "    \n",
    "    def predict(self, X):\n",
    "        \"\"\"\n",
    "        Prediction function\n",
    "            h(x) = \\alpha_1*ys + \\alpha_0 + (\\beta*ys + 1)<\\gamma, x>\n",
    "            \n",
    "        Returns\n",
    "        -------\n",
    "            pred1  : \\alpha_1*ys + \\alpha_0\n",
    "            pred2  : \\beta*ys + 1\n",
    "            pred3  : <\\gamma, x>\n",
    "            \n",
    "            y_pred : \\alpha_1*ys + \\alpha_0 + (\\beta*ys + 1)<\\gamma, x>\n",
    "        \"\"\"\n",
    "        # dataset\n",
    "        X_source_pred1 = X.iloc[:,dim_x:].copy()\n",
    "        X_source_pred1['Intercept'] = 1\n",
    "        X_source_pred2 = X.iloc[:,dim_x:].copy()\n",
    "        X_pred = X.iloc[:,:dim_x]\n",
    "        \n",
    "        # Compute each term\n",
    "        self.pred1 = X_source_pred1.dot(self.alpha)\n",
    "        self.pred2 = X_source_pred2.dot(self.beta) + 1\n",
    "        self.pred3 = X_pred.dot(self.gamma)\n",
    "        y_pred = self.pred1 - self.pred2*self.pred3\n",
    "        \n",
    "        return y_pred\n",
    "\n",
    "    def score(self, X, y=None):\n",
    "        \"\"\"\n",
    "        Score function for cross-validation\n",
    "        \n",
    "        Returns\n",
    "        -------\n",
    "            -\\sum(y-\\hat{y})/n (Consider the minus value because 'GridSearchCV' maximize the score.)\n",
    "        \"\"\"\n",
    "        return -sum((y.values - self.predict(X).values)**2)/self.n_sample\n",
    "    \n",
    "    def get_params(self, deep=True):\n",
    "        \"\"\"\n",
    "        Create parameter dictionary for cross-validation\n",
    "        \n",
    "        Returns\n",
    "        -------\n",
    "            {'lambda1', 'lambda2', 'lambda3', 'l1_ratio'}\n",
    "        \"\"\"\n",
    "        return {'lambda1' : self.lambda1,\n",
    "                'lambda2' : self.lambda2,\n",
    "                'lambda3_scale' : self.lambda3_scale,\n",
    "                'lambda3_trend' : self.lambda3_trend}\n",
    "    \n",
    "    def set_params(self, **parameters):\n",
    "        \"\"\"\n",
    "        For cross-validation\n",
    "        \"\"\"\n",
    "        for parameter, value in parameters.items():\n",
    "            setattr(self, parameter, value)\n",
    "        return self     "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Main codes"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Setting"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "num_itr = 20"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Load data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('../30_Output/40_pkl/100_CheckData/100_Data.pkl', 'rb') as f:\n",
    "    data_list = pickle.load(f)\n",
    "x = data_list['x']\n",
    "y = data_list['y']\n",
    "ys = data_list['ys']\n",
    "PID = data_list['PID']\n",
    "dim_x = x.shape[1]\n",
    "\n",
    "# Scaling parameter\n",
    "#     For stability of the estimation, scaling parameters are calculated using all data.\n",
    "x_Mean = x.mean()\n",
    "x_Std = x.std()\n",
    "y_LogMean = np.log(y).mean()\n",
    "y_LogStd = np.log(y).std()\n",
    "ys_LogMean = np.log(ys).mean()\n",
    "ys_LogStd = np.log(ys).std()\n",
    "\n",
    "# Training results\n",
    "result_dict = dict()\n",
    "for i_itr in range(num_itr):\n",
    "    with open('../30_Output/40_pkl/300_TransferLearning/300_Results_'+str(i_itr)+'.pkl', 'rb') as f:\n",
    "        result_dict[i_itr] = pickle.load(f)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Prepare a data frame for plot"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Root mean squared error\n",
    "df_rmse = pd.DataFrame(columns=['Itr', 'Type', 'Root mean squared error'])\n",
    "for i_itr in range(num_itr):\n",
    "    tmp_mse_slr = np.mean((np.log(result_dict[i_itr]['y_test'])-result_dict[i_itr]['y_pred_slr'])**2)\n",
    "    tmp_mse_diff = np.mean((np.log(result_dict[i_itr]['y_test'])-result_dict[i_itr]['y_pred_diff'])**2)\n",
    "    tmp_mse_inv = np.mean((np.log(result_dict[i_itr]['y_test'])-result_dict[i_itr]['y_pred_inv'])**2)\n",
    "    df_rmse = pd.concat([df_rmse, pd.DataFrame(np.array([i_itr, 'Simple linear regression', np.sqrt(tmp_mse_slr)]).reshape(1, -1), columns=['Itr', 'Type', 'Root mean squared error'])], axis=0)\n",
    "    df_rmse = pd.concat([df_rmse, pd.DataFrame(np.array([i_itr, 'Regression on the log-difference', np.sqrt(tmp_mse_diff)]).reshape(1, -1), columns=['Itr', 'Type', 'Root mean squared error'])], axis=0)\n",
    "    df_rmse = pd.concat([df_rmse, pd.DataFrame(np.array([i_itr, 'Proposed method', np.sqrt(tmp_mse_inv)]).reshape(1, -1), columns=['Itr', 'Type', 'Root mean squared error'])], axis=0)\n",
    "df_rmse = df_rmse.astype({'Root mean squared error': 'float64'})\n",
    "df_rmse = df_rmse.replace('Regression on the log-difference', 'Regression to the log-difference')\n",
    "\n",
    "# Parameters of single linear regression\n",
    "df_params_slr = pd.DataFrame(columns=['Itr', 'Coefficient', 'Intercept'])\n",
    "for i_itr in range(num_itr):\n",
    "    df_params_slr.loc[i_itr] = [i_itr, result_dict[i_itr]['model_slr'].coef_[0], result_dict[i_itr]['model_slr'].intercept_]\n",
    "    \n",
    "# Parameters of learning the log-difference model\n",
    "df_params_diff = pd.DataFrame(columns=['Itr', 'Intercept']+x.columns.values.tolist())\n",
    "for i_itr in range(num_itr):\n",
    "    df_params_diff.loc[i_itr] = [i_itr, result_dict[i_itr]['model_diff'].intercept]+result_dict[i_itr]['model_diff'].theta.tolist()\n",
    "    \n",
    "# Parameters of proposed method model\n",
    "df_params_inv = pd.DataFrame(columns=['Itr', 'alpha', 'Intercept', 'beta']+x.columns.values.tolist())\n",
    "for i_itr in range(num_itr):\n",
    "    df_params_inv.loc[i_itr] = [i_itr, result_dict[i_itr]['model_inv'].alpha[0], result_dict[i_itr]['model_inv'].alpha[1], result_dict[i_itr]['model_inv'].beta[0]]+result_dict[i_itr]['model_inv'].gamma.tolist()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Plot"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Performance"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-- Mean --\n",
      "                                  Root mean squared error  \\\n",
      "Type                                                        \n",
      "Simple linear regression                         0.140309   \n",
      "Regression to the log-difference                 0.136789   \n",
      "Proposed method                                  0.135721   \n",
      "\n",
      "                                  Root mean squared error  \n",
      "Type                                                       \n",
      "Simple linear regression                         0.046056  \n",
      "Regression to the log-difference                 0.042652  \n",
      "Proposed method                                  0.041733  \n"
     ]
    }
   ],
   "source": [
    "# Mean - std\n",
    "tmp1 = df_rmse.groupby('Type').mean().loc[['Simple linear regression','Regression to the log-difference','Proposed method'], ['Root mean squared error']].rename(columns={'RMSE':'Mean'})\n",
    "tmp2 = df_rmse.groupby('Type').std().loc[['Simple linear regression','Regression to the log-difference','Proposed method'], ['Root mean squared error']].rename(columns={'RMSE':'Std'})\n",
    "print('-- Mean --')\n",
    "print(pd.concat([tmp1, tmp2], axis=1))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Parameters"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_params_abc = df_params_inv[['alpha', 'beta', 'Intercept']].copy()\n",
    "df_params_abc['alpha_rescaled'] = df_params_abc['alpha']/ys_LogStd\n",
    "df_params_abc['beta_rescaled'] = df_params_abc['beta']/ys_LogStd\n",
    "df_params_abc['Intercept_rescaled'] = df_params_abc['Intercept'] - df_params_abc['alpha']*ys_LogMean/ys_LogStd"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Mean\n",
    "print('-- Mean --')\n",
    "print(df_params_abc[['alpha_rescaled', 'beta_rescaled', 'Intercept_rescaled']].melt().groupby(['variable']).mean())\n",
    "# Std\n",
    "print('')\n",
    "print('-- Std --')\n",
    "print(df_params_abc[['alpha_rescaled', 'beta_rescaled', 'Intercept_rescaled']].melt().groupby(['variable']).std())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "col_palette=sns.color_palette(\n",
    "     [sns.color_palette(\"Blues_d\", 24)[15]]*10\n",
    "    +[sns.color_palette(\"Blues_d\", 24)[5]]*20\n",
    "    +[sns.color_palette(\"Blues_d\", 24)[15]]*20\n",
    "    +[sns.color_palette(\"Blues_d\", 24)[5]]*20\n",
    "    +[sns.color_palette(\"Blues_d\", 24)[15]]*20\n",
    "    +[sns.color_palette(\"Blues_d\", 24)[5]]*20\n",
    "    +[sns.color_palette(\"Blues_d\", 24)[15]]*20\n",
    "    +[sns.color_palette(\"Blues_d\", 24)[5]]*20\n",
    "    +[sns.color_palette(\"Blues_d\", 24)[15]]*20\n",
    "    +[sns.color_palette(\"Blues_d\", 24)[5]]*20\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tmp = df_params_inv.iloc[:,4:].mean()\n",
    "tmp2 = pd.DataFrame(tmp, columns=['Value'])\n",
    "tmp2['Type'] = tmp.index.values\n",
    "tmp3_inv = tmp2.sort_values(by='Value', ascending=False)\n",
    "\n",
    "data_plt = df_params_inv.iloc[:,4:].melt()\n",
    "data_plt['value_neg'] = data_plt['value']\n",
    "\n",
    "fig, ax = plt.subplots(1, 1, figsize=(40, 10))\n",
    "# plt.ylim(10, 40)\n",
    "graph = sns.barplot(y='value_neg', x='variable', data=data_plt,ax=ax, ci=None, palette=col_palette)#color='steelblue')\n",
    "list_vline = [9.5, 29.5, 49.5, 69.5, 89.5, 109.5, 129.5, 149.5, 169.5]\n",
    "for i_vline in range(9):\n",
    "    graph.axvline(list_vline[i_vline], alpha=0.7, color='gray', linestyle='--', linewidth=1)\n",
    "for i_hline in [0.003, 0.002, 0.001, -0.001, -0.002, -0.003]:\n",
    "    graph.axhline(i_hline, alpha=0.3, color='gray', linestyle='--', linewidth=1)\n",
    "graph.set_ylim([-0.0035, 0.0035])\n",
    "ax.set_ylabel('Value', size=25)\n",
    "ax.set_xlabel('')\n",
    "\n",
    "plt.text(4.5,   -0.005, 'mass', size=25, ha='center')\n",
    "plt.text(19.5,  -0.005, 'charge', size=25, ha='center')\n",
    "plt.text(39.5,  -0.005, r'$\\epsilon$', size=25, ha='center')\n",
    "plt.text(59.5,  -0.005, r'$\\sigma$', size=25, ha='center')\n",
    "plt.text(79.5,  -0.005, r'$K_{\\rm bond}$', size=25, ha='center')\n",
    "plt.text(99.5,  -0.005, r'$r_0$', size=25, ha='center')\n",
    "plt.text(119.5, -0.005, 'polar', size=25, ha='center')\n",
    "plt.text(139.5, -0.005, r'$K_{\\rm angle}$', size=25, ha='center')\n",
    "plt.text(159.5, -0.005, r'$\\theta_0$', size=25, ha='center')\n",
    "plt.text(179.5, -0.005, r'$K_{\\rm dih}$', size=25, ha='center')\n",
    "\n",
    "for line_x in [-0.5]+list_vline+[189.5]:\n",
    "    plt.annotate('', xy=(line_x, -0.0035), xytext=(line_x, -0.0055), annotation_clip=False, arrowprops=dict(arrowstyle='-', color='gray', alpha=0.5))\n",
    "plt.xticks(rotation=90)\n",
    "plt.tight_layout(rect=[0,0.1,1,1])\n",
    "plt.savefig('../30_Output/20_Plot/400_MakeResult/434_Barplot_INV.pdf')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
