{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "bearing-contrast",
   "metadata": {},
   "source": [
    "# DeepTLF \n",
    "\n",
    "PyTorch Implementation\n",
    "### DeepTLF follows the sckit-learn API. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "athletic-prague",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-10-05T18:24:30.894959Z",
     "iopub.status.busy": "2021-10-05T18:24:30.894643Z",
     "iopub.status.idle": "2021-10-05T18:24:31.011657Z",
     "shell.execute_reply": "2021-10-05T18:24:31.011057Z",
     "shell.execute_reply.started": "2021-10-05T18:24:30.894929Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/home/john/Documents/Projects/2021/DeepTLF/to_submit\n"
     ]
    }
   ],
   "source": [
    "!pwd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "thermal-garlic",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-10-05T18:25:02.337171Z",
     "iopub.status.busy": "2021-10-05T18:25:02.336819Z",
     "iopub.status.idle": "2021-10-05T18:25:02.340330Z",
     "shell.execute_reply": "2021-10-05T18:25:02.339781Z",
     "shell.execute_reply.started": "2021-10-05T18:25:02.337139Z"
    }
   },
   "outputs": [],
   "source": [
    "import sys \n",
    "sys.path.append('./new')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "cognitive-sentence",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-10-05T18:25:02.830790Z",
     "iopub.status.busy": "2021-10-05T18:25:02.830552Z",
     "iopub.status.idle": "2021-10-05T18:25:02.834568Z",
     "shell.execute_reply": "2021-10-05T18:25:02.833627Z",
     "shell.execute_reply.started": "2021-10-05T18:25:02.830766Z"
    }
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import accuracy_score\n",
    "from sklearn.datasets import load_breast_cancer\n",
    "\n",
    "import xgboost as xgb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "premium-library",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-10-05T18:25:03.138795Z",
     "iopub.status.busy": "2021-10-05T18:25:03.138544Z",
     "iopub.status.idle": "2021-10-05T18:25:03.441814Z",
     "shell.execute_reply": "2021-10-05T18:25:03.441154Z",
     "shell.execute_reply.started": "2021-10-05T18:25:03.138767Z"
    }
   },
   "outputs": [],
   "source": [
    "from deeptlf import DeepTFL"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "voluntary-joshua",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-10-05T18:25:09.173982Z",
     "iopub.status.busy": "2021-10-05T18:25:09.173593Z",
     "iopub.status.idle": "2021-10-05T18:25:09.211240Z",
     "shell.execute_reply": "2021-10-05T18:25:09.210680Z",
     "shell.execute_reply.started": "2021-10-05T18:25:09.173949Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\u001b[0;31mInit signature:\u001b[0m\n",
       "\u001b[0mDeepTFL\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
       "\u001b[0;34m\u001b[0m    \u001b[0mn_est\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m23\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
       "\u001b[0;34m\u001b[0m    \u001b[0mmax_depth\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m6\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
       "\u001b[0;34m\u001b[0m    \u001b[0mdrop\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.23\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
       "\u001b[0;34m\u001b[0m    \u001b[0mxgb_lr\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.5\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
       "\u001b[0;34m\u001b[0m    \u001b[0mbatchsize\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m320\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
       "\u001b[0;34m\u001b[0m    \u001b[0mnn_lr\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.0001\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
       "\u001b[0;34m\u001b[0m    \u001b[0mnn1\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m384\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
       "\u001b[0;34m\u001b[0m    \u001b[0mnn2\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m128\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
       "\u001b[0;34m\u001b[0m    \u001b[0mnn3\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m64\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
       "\u001b[0;34m\u001b[0m    \u001b[0mnn4\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m32\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
       "\u001b[0;34m\u001b[0m    \u001b[0mtask\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'class'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
       "\u001b[0;34m\u001b[0m    \u001b[0mdebug\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
       "\u001b[0;34m\u001b[0m    \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
       "\u001b[0;34m\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
       "\u001b[0;31mDocstring:\u001b[0m     \n",
       "Base class for all estimators in scikit-learn.\n",
       "\n",
       "Notes\n",
       "-----\n",
       "All estimators should specify all the parameters that can be set\n",
       "at the class level in their ``__init__`` as explicit keyword\n",
       "arguments (no ``*args`` or ``**kwargs``).\n",
       "\u001b[0;31mFile:\u001b[0m           ~/Documents/Projects/2021/DeepTLF/to_submit/new/deeptlf.py\n",
       "\u001b[0;31mType:\u001b[0m           type\n",
       "\u001b[0;31mSubclasses:\u001b[0m     \n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "DeepTFL?"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "sensitive-complex",
   "metadata": {},
   "source": [
    "# Load and split data set \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "legal-timing",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-10-05T18:25:10.055966Z",
     "iopub.status.busy": "2021-10-05T18:25:10.055588Z",
     "iopub.status.idle": "2021-10-05T18:25:10.071346Z",
     "shell.execute_reply": "2021-10-05T18:25:10.070907Z",
     "shell.execute_reply.started": "2021-10-05T18:25:10.055931Z"
    }
   },
   "outputs": [],
   "source": [
    "X, y = load_breast_cancer(return_X_y=True)\n",
    "# split data \n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, \n",
    "                                                    test_size=0.1,\n",
    "                                                    random_state=42)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "split-guest",
   "metadata": {},
   "source": [
    "# DeepTLF model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "offensive-defense",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-10-05T18:25:11.543093Z",
     "iopub.status.busy": "2021-10-05T18:25:11.542729Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[20:25:11] WARNING: ../src/learner.cc:1061: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/john/anaconda3/lib/python3.7/site-packages/xgboost/sklearn.py:888: UserWarning: The use of label encoder in XGBClassifier is deprecated and will be removed in a future release. To remove this warning, do the following: 1) Pass option use_label_encoder=False when constructing XGBClassifier object; and 2) Encode your labels (y) as integers starting with 0, i.e. 0, 1, 2, ..., [num_class - 1].\n",
      "  warnings.warn(label_encoder_deprecation_msg, UserWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(512, 123)\n"
     ]
    }
   ],
   "source": [
    "model = DeepTFL(task='class')\n",
    "model.fit(X_train,y_train)\n",
    "\n",
    "y_hat = model.predict(X_test)\n",
    "\n",
    "y_hat = np.array(y_hat)\n",
    "y_hat[y_hat>0.5] = 1\n",
    "y_hat[y_hat<0.5] = 0\n",
    "\n",
    "acc_DeepTLF = accuracy_score(y_test, y_hat)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "changing-lawrence",
   "metadata": {},
   "source": [
    "# GBDT model \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "viral-shanghai",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[20:08:44] WARNING: ../src/learner.cc:1061: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/john/anaconda3/lib/python3.7/site-packages/xgboost/sklearn.py:888: UserWarning: The use of label encoder in XGBClassifier is deprecated and will be removed in a future release. To remove this warning, do the following: 1) Pass option use_label_encoder=False when constructing XGBClassifier object; and 2) Encode your labels (y) as integers starting with 0, i.e. 0, 1, 2, ..., [num_class - 1].\n",
      "  warnings.warn(label_encoder_deprecation_msg, UserWarning)\n"
     ]
    }
   ],
   "source": [
    "xgb_model = xgb.XGBClassifier()\n",
    "xgb_model.fit(X_train,y_train)\n",
    "xgb_y_hat = xgb_model.predict(X_test)\n",
    "\n",
    "xgb_y_hat[xgb_y_hat>0.5] = 1\n",
    "xgb_y_hat[xgb_y_hat<0.5] = 0\n",
    "acc_GBDT = accuracy_score(y_test, xgb_y_hat)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "simple-group",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "sweet-danger",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Results:\n",
      "GBDT accuracy: \t\t 96.49122807017544 %\n",
      "DeepTLF accuracy:\t 98.24561403508771 %\n"
     ]
    }
   ],
   "source": [
    "print('Results:')\n",
    "print('GBDT accuracy: \\t\\t', acc_GBDT*100,'%')\n",
    "print('DeepTLF accuracy:\\t', acc_DeepTLF*100,'%')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "functional-authority",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
