{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from fastai.tabular.all import *\n",
    "\n",
    "#we'll handle missing data infill with Automunge\n",
    "#note exc2 is a pass-through transform that defaults to mode infill\n",
    "\n",
    "from Automunge import Automunger\n",
    "am = Automunger.AutoMunge()\n",
    "\n",
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# number_of_epochs = 3\n",
    "number_of_epochs = 14\n",
    "# number_of_epochs = 28\n",
    "# number_of_epochs = 35\n",
    "# number_of_epochs = 42\n",
    "# number_of_epochs = 200\n",
    "\n",
    "# #full DATA\n",
    "# sample_ratio = 1.0\n",
    "# validation_ratio = 0.0567\n",
    "\n",
    "#5% DATA\n",
    "sample_ratio = 0.103833244329036\n",
    "validation_ratio = 0.545745953948647\n",
    "\n",
    "#0.25% DATA\n",
    "# sample_ratio = 0.05902490655\n",
    "# validation_ratio = 0.9600451114\n",
    "\n",
    "# #tiny data\n",
    "# sample_ratio = 0.0001\n",
    "# validation_ratio = 0.5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "path = \"/data/Benchmark_datasets/Higgs/HIGGS.csv\"\n",
    "\n",
    "df_train1 = pd.read_csv(path, header=None)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Data Augmentation Experiment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "randomstate = 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "#First we'll process without noise injection\n",
    "\n",
    "cont_names = ['1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20','21','22','23','24','25','26','27','28']\n",
    "label_names='0'\n",
    "\n",
    "assigncat={'retn':cont_names, 'lbos':label_names}\n",
    "# assignparam = {}\n",
    "#turning off inplace as expect the order of columns consistency with DPrt will be impacted by inplace\n",
    "assignparam = {'global_assignparam' : {'inplace' : False}}\n",
    "\n",
    "df_train = df_train1.sample(frac = sample_ratio, random_state = randomstate)\n",
    "\n",
    "train, trainID, labels, \\\n",
    "validation1, validationID1, validationlabels1, \\\n",
    "validation2, validationID2, validationlabels2, \\\n",
    "test, testID, testlabels, \\\n",
    "labelsencoding_dict, finalcolumns_train, finalcolumns_test, \\\n",
    "featureimportance, postprocess_dict = \\\n",
    "am.automunge(df_train, labels_column = label_names, MLinfill = False, \n",
    "             assigncat=assigncat, \\\n",
    "             assigninfill={'adjinfill':cont_names}, \\\n",
    "             assignparam=assignparam, \\\n",
    "             pandasoutput=True, printstatus=False)\n",
    "\n",
    "cont_names_final = list(train)\n",
    "y_names = list(labels)[0]\n",
    "\n",
    "\n",
    "\n",
    "train_headers = finalcolumns_train\n",
    "\n",
    "\n",
    "#now we'll apply noise injection and concatinate results\n",
    "\n",
    "cont_names = ['1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20','21','22','23','24','25','26','27','28']\n",
    "label_names='0'\n",
    "\n",
    "assigncat={'DPrt':cont_names, 'lbos':label_names}\n",
    "#turning off inplace as expect the order of columns consistency with DPrt will be impacted by inplace\n",
    "assignparam={'default_assignparam' : {'DPrt' : {'flip_prob' : 0.03}}, \\\n",
    "             'global_assignparam' : {'inplace' : False}}\n",
    "\n",
    "\n",
    "df_train = df_train1.sample(frac = sample_ratio, random_state = randomstate)\n",
    "\n",
    "train2, trainID, labels2, \\\n",
    "validation1, validationID1, validationlabels1, \\\n",
    "validation2, validationID2, validationlabels2, \\\n",
    "test, testID, testlabels, \\\n",
    "labelsencoding_dict, finalcolumns_train, finalcolumns_test, \\\n",
    "featureimportance, postprocess_dict = \\\n",
    "am.automunge(df_train, labels_column = label_names, MLinfill = False, \n",
    "             assigncat=assigncat, \\\n",
    "             assigninfill={'adjinfill':cont_names}, \\\n",
    "             assignparam = assignparam, \\\n",
    "             pandasoutput=True, printstatus=False)\n",
    "\n",
    "# cont_names = postprocess_dict['columntype_report']['continuous']\n",
    "# cat_names = postprocess_dict['columntype_report']['ordinal']\n",
    "# y_names = list(labels)[0]\n",
    "\n",
    "\n",
    "#we want consistent column header names\n",
    "train2.columns = train_headers\n",
    "\n",
    "train = pd.concat([train, labels], axis = 1)\n",
    "\n",
    "train2 = pd.concat([train2, labels2], axis = 1)\n",
    "\n",
    "\n",
    "train = pd.concat([train, train2], axis = 0, ignore_index=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: left;\">\n",
       "      <th>epoch</th>\n",
       "      <th>train_loss</th>\n",
       "      <th>valid_loss</th>\n",
       "      <th>roc_auc_score</th>\n",
       "      <th>time</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>0.552340</td>\n",
       "      <td>0.538891</td>\n",
       "      <td>0.800981</td>\n",
       "      <td>03:08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>0.536620</td>\n",
       "      <td>0.521141</td>\n",
       "      <td>0.816937</td>\n",
       "      <td>03:06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>0.525746</td>\n",
       "      <td>0.512494</td>\n",
       "      <td>0.824680</td>\n",
       "      <td>03:04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>0.515383</td>\n",
       "      <td>0.506518</td>\n",
       "      <td>0.829423</td>\n",
       "      <td>03:05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>0.521961</td>\n",
       "      <td>0.502119</td>\n",
       "      <td>0.831946</td>\n",
       "      <td>03:05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>0.506850</td>\n",
       "      <td>0.496301</td>\n",
       "      <td>0.836542</td>\n",
       "      <td>03:06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>6</td>\n",
       "      <td>0.521452</td>\n",
       "      <td>0.495257</td>\n",
       "      <td>0.837431</td>\n",
       "      <td>03:07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>7</td>\n",
       "      <td>0.501407</td>\n",
       "      <td>0.490071</td>\n",
       "      <td>0.840733</td>\n",
       "      <td>03:06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>8</td>\n",
       "      <td>0.491337</td>\n",
       "      <td>0.486088</td>\n",
       "      <td>0.843922</td>\n",
       "      <td>03:11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>9</td>\n",
       "      <td>0.481345</td>\n",
       "      <td>0.482306</td>\n",
       "      <td>0.847351</td>\n",
       "      <td>03:09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>10</td>\n",
       "      <td>0.489115</td>\n",
       "      <td>0.481538</td>\n",
       "      <td>0.846810</td>\n",
       "      <td>03:06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>11</td>\n",
       "      <td>0.475397</td>\n",
       "      <td>0.476509</td>\n",
       "      <td>0.850187</td>\n",
       "      <td>03:07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>12</td>\n",
       "      <td>0.476297</td>\n",
       "      <td>0.473758</td>\n",
       "      <td>0.852117</td>\n",
       "      <td>03:05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>13</td>\n",
       "      <td>0.481291</td>\n",
       "      <td>0.472399</td>\n",
       "      <td>0.853129</td>\n",
       "      <td>03:05</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "#rev1\n",
    "\n",
    "splits = RandomSplitter(valid_pct=validation_ratio)(range_of(train))\n",
    "\n",
    "to = TabularPandas(train, procs=[],\n",
    "                   cont_names = cont_names_final,\n",
    "                   y_names = y_names,\n",
    "                   splits = splits)\n",
    "\n",
    "dls = to.dataloaders(bs=64)\n",
    "\n",
    "auc_metric = RocAucBinary()\n",
    "\n",
    "learn = tabular_learner(dls, layers= [300, 300, 300, 300, 300], metrics=auc_metric)\n",
    "\n",
    "learn.fit_one_cycle(number_of_epochs)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# data aug rev2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "randomstate = 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "#First we'll process without noise injection\n",
    "\n",
    "cont_names = ['1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20','21','22','23','24','25','26','27','28']\n",
    "label_names='0'\n",
    "\n",
    "assigncat={'retn':cont_names, 'lbos':label_names}\n",
    "# assignparam = {}\n",
    "#turning off inplace as expect the order of columns consistency with DPrt will be impacted by inplace\n",
    "assignparam = {'global_assignparam' : {'inplace' : False}}\n",
    "\n",
    "df_train = df_train1.sample(frac = sample_ratio, random_state = randomstate)\n",
    "\n",
    "train, trainID, labels, \\\n",
    "validation1, validationID1, validationlabels1, \\\n",
    "validation2, validationID2, validationlabels2, \\\n",
    "test, testID, testlabels, \\\n",
    "labelsencoding_dict, finalcolumns_train, finalcolumns_test, \\\n",
    "featureimportance, postprocess_dict = \\\n",
    "am.automunge(df_train, labels_column = label_names, MLinfill = False, \n",
    "             assigncat=assigncat, \\\n",
    "             assigninfill={'adjinfill':cont_names}, \\\n",
    "             assignparam=assignparam, \\\n",
    "             pandasoutput=True, printstatus=False)\n",
    "\n",
    "cont_names_final = list(train)\n",
    "y_names = list(labels)[0]\n",
    "\n",
    "\n",
    "\n",
    "train_headers = finalcolumns_train\n",
    "\n",
    "\n",
    "#now we'll apply noise injection and concatinate results\n",
    "\n",
    "cont_names = ['1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20','21','22','23','24','25','26','27','28']\n",
    "label_names='0'\n",
    "\n",
    "assigncat={'DPrt':cont_names, 'lbos':label_names}\n",
    "#turning off inplace as expect the order of columns consistency with DPrt will be impacted by inplace\n",
    "assignparam={'default_assignparam' : {'DPrt' : {'flip_prob' : 0.03}}, \\\n",
    "             'global_assignparam' : {'inplace' : False}}\n",
    "\n",
    "\n",
    "df_train = df_train1.sample(frac = sample_ratio, random_state = randomstate)\n",
    "\n",
    "train2, trainID, labels2, \\\n",
    "validation1, validationID1, validationlabels1, \\\n",
    "validation2, validationID2, validationlabels2, \\\n",
    "test, testID, testlabels, \\\n",
    "labelsencoding_dict, finalcolumns_train, finalcolumns_test, \\\n",
    "featureimportance, postprocess_dict = \\\n",
    "am.automunge(df_train, labels_column = label_names, MLinfill = False, \n",
    "             assigncat=assigncat, \\\n",
    "             assigninfill={'adjinfill':cont_names}, \\\n",
    "             assignparam = assignparam, \\\n",
    "             pandasoutput=True, printstatus=False)\n",
    "\n",
    "# cont_names = postprocess_dict['columntype_report']['continuous']\n",
    "# cat_names = postprocess_dict['columntype_report']['ordinal']\n",
    "# y_names = list(labels)[0]\n",
    "\n",
    "\n",
    "#we want consistent column header names\n",
    "train2.columns = train_headers\n",
    "\n",
    "train = pd.concat([train, labels], axis = 1)\n",
    "\n",
    "train2 = pd.concat([train2, labels2], axis = 1)\n",
    "\n",
    "\n",
    "train = pd.concat([train, train2], axis = 0, ignore_index=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: left;\">\n",
       "      <th>epoch</th>\n",
       "      <th>train_loss</th>\n",
       "      <th>valid_loss</th>\n",
       "      <th>roc_auc_score</th>\n",
       "      <th>time</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>0.549884</td>\n",
       "      <td>0.537744</td>\n",
       "      <td>0.801733</td>\n",
       "      <td>03:02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>0.530816</td>\n",
       "      <td>0.518143</td>\n",
       "      <td>0.819475</td>\n",
       "      <td>03:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>0.521803</td>\n",
       "      <td>0.514726</td>\n",
       "      <td>0.821158</td>\n",
       "      <td>03:02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>0.508796</td>\n",
       "      <td>0.503027</td>\n",
       "      <td>0.831129</td>\n",
       "      <td>03:08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>0.510872</td>\n",
       "      <td>0.499651</td>\n",
       "      <td>0.833825</td>\n",
       "      <td>03:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>0.511891</td>\n",
       "      <td>0.497274</td>\n",
       "      <td>0.835074</td>\n",
       "      <td>03:07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>6</td>\n",
       "      <td>0.502657</td>\n",
       "      <td>0.495376</td>\n",
       "      <td>0.836398</td>\n",
       "      <td>03:07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>7</td>\n",
       "      <td>0.498406</td>\n",
       "      <td>0.489450</td>\n",
       "      <td>0.841199</td>\n",
       "      <td>03:07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>8</td>\n",
       "      <td>0.496770</td>\n",
       "      <td>0.485792</td>\n",
       "      <td>0.843336</td>\n",
       "      <td>03:06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>9</td>\n",
       "      <td>0.489748</td>\n",
       "      <td>0.481692</td>\n",
       "      <td>0.846289</td>\n",
       "      <td>03:04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>10</td>\n",
       "      <td>0.481977</td>\n",
       "      <td>0.482148</td>\n",
       "      <td>0.846272</td>\n",
       "      <td>03:04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>11</td>\n",
       "      <td>0.488649</td>\n",
       "      <td>0.476639</td>\n",
       "      <td>0.850077</td>\n",
       "      <td>03:05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>12</td>\n",
       "      <td>0.476893</td>\n",
       "      <td>0.475579</td>\n",
       "      <td>0.851220</td>\n",
       "      <td>03:08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>13</td>\n",
       "      <td>0.471256</td>\n",
       "      <td>0.475535</td>\n",
       "      <td>0.851143</td>\n",
       "      <td>03:10</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "#rev2\n",
    "\n",
    "splits = RandomSplitter(valid_pct=validation_ratio)(range_of(train))\n",
    "\n",
    "to = TabularPandas(train, procs=[],\n",
    "                   cont_names = cont_names_final,\n",
    "                   y_names = y_names,\n",
    "                   splits = splits)\n",
    "\n",
    "dls = to.dataloaders(bs=64)\n",
    "\n",
    "auc_metric = RocAucBinary()\n",
    "\n",
    "learn = tabular_learner(dls, layers= [300, 300, 300, 300, 300], metrics=auc_metric)\n",
    "\n",
    "learn.fit_one_cycle(number_of_epochs)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Data Aug rev3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "randomstate = 3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "#First we'll process without noise injection\n",
    "\n",
    "cont_names = ['1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20','21','22','23','24','25','26','27','28']\n",
    "label_names='0'\n",
    "\n",
    "assigncat={'retn':cont_names, 'lbos':label_names}\n",
    "# assignparam = {}\n",
    "#turning off inplace as expect the order of columns consistency with DPrt will be impacted by inplace\n",
    "assignparam = {'global_assignparam' : {'inplace' : False}}\n",
    "\n",
    "df_train = df_train1.sample(frac = sample_ratio, random_state = randomstate)\n",
    "\n",
    "train, trainID, labels, \\\n",
    "validation1, validationID1, validationlabels1, \\\n",
    "validation2, validationID2, validationlabels2, \\\n",
    "test, testID, testlabels, \\\n",
    "labelsencoding_dict, finalcolumns_train, finalcolumns_test, \\\n",
    "featureimportance, postprocess_dict = \\\n",
    "am.automunge(df_train, labels_column = label_names, MLinfill = False, \n",
    "             assigncat=assigncat, \\\n",
    "             assigninfill={'adjinfill':cont_names}, \\\n",
    "             assignparam=assignparam, \\\n",
    "             pandasoutput=True, printstatus=False)\n",
    "\n",
    "cont_names_final = list(train)\n",
    "y_names = list(labels)[0]\n",
    "\n",
    "\n",
    "\n",
    "train_headers = finalcolumns_train\n",
    "\n",
    "\n",
    "#now we'll apply noise injection and concatinate results\n",
    "\n",
    "cont_names = ['1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20','21','22','23','24','25','26','27','28']\n",
    "label_names='0'\n",
    "\n",
    "assigncat={'DPrt':cont_names, 'lbos':label_names}\n",
    "#turning off inplace as expect the order of columns consistency with DPrt will be impacted by inplace\n",
    "assignparam={'default_assignparam' : {'DPrt' : {'flip_prob' : 0.03}}, \\\n",
    "             'global_assignparam' : {'inplace' : False}}\n",
    "\n",
    "\n",
    "df_train = df_train1.sample(frac = sample_ratio, random_state = randomstate)\n",
    "\n",
    "train2, trainID, labels2, \\\n",
    "validation1, validationID1, validationlabels1, \\\n",
    "validation2, validationID2, validationlabels2, \\\n",
    "test, testID, testlabels, \\\n",
    "labelsencoding_dict, finalcolumns_train, finalcolumns_test, \\\n",
    "featureimportance, postprocess_dict = \\\n",
    "am.automunge(df_train, labels_column = label_names, MLinfill = False, \n",
    "             assigncat=assigncat, \\\n",
    "             assigninfill={'adjinfill':cont_names}, \\\n",
    "             assignparam = assignparam, \\\n",
    "             pandasoutput=True, printstatus=False)\n",
    "\n",
    "# cont_names = postprocess_dict['columntype_report']['continuous']\n",
    "# cat_names = postprocess_dict['columntype_report']['ordinal']\n",
    "# y_names = list(labels)[0]\n",
    "\n",
    "\n",
    "#we want consistent column header names\n",
    "train2.columns = train_headers\n",
    "\n",
    "train = pd.concat([train, labels], axis = 1)\n",
    "\n",
    "train2 = pd.concat([train2, labels2], axis = 1)\n",
    "\n",
    "\n",
    "train = pd.concat([train, train2], axis = 0, ignore_index=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: left;\">\n",
       "      <th>epoch</th>\n",
       "      <th>train_loss</th>\n",
       "      <th>valid_loss</th>\n",
       "      <th>roc_auc_score</th>\n",
       "      <th>time</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>0.549151</td>\n",
       "      <td>0.535055</td>\n",
       "      <td>0.804378</td>\n",
       "      <td>03:07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>0.545248</td>\n",
       "      <td>0.522088</td>\n",
       "      <td>0.817113</td>\n",
       "      <td>03:06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>0.521739</td>\n",
       "      <td>0.511635</td>\n",
       "      <td>0.825029</td>\n",
       "      <td>03:07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>0.519124</td>\n",
       "      <td>0.506295</td>\n",
       "      <td>0.828270</td>\n",
       "      <td>03:05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>0.501218</td>\n",
       "      <td>0.503309</td>\n",
       "      <td>0.832078</td>\n",
       "      <td>03:03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>0.508827</td>\n",
       "      <td>0.504454</td>\n",
       "      <td>0.829840</td>\n",
       "      <td>03:05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>6</td>\n",
       "      <td>0.507468</td>\n",
       "      <td>0.490709</td>\n",
       "      <td>0.840530</td>\n",
       "      <td>03:03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>7</td>\n",
       "      <td>0.491551</td>\n",
       "      <td>0.490298</td>\n",
       "      <td>0.841028</td>\n",
       "      <td>03:08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>8</td>\n",
       "      <td>0.499404</td>\n",
       "      <td>0.485925</td>\n",
       "      <td>0.843806</td>\n",
       "      <td>03:05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>9</td>\n",
       "      <td>0.490729</td>\n",
       "      <td>0.483497</td>\n",
       "      <td>0.845717</td>\n",
       "      <td>03:02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>10</td>\n",
       "      <td>0.487910</td>\n",
       "      <td>0.478820</td>\n",
       "      <td>0.848748</td>\n",
       "      <td>03:06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>11</td>\n",
       "      <td>0.476519</td>\n",
       "      <td>0.475133</td>\n",
       "      <td>0.851300</td>\n",
       "      <td>03:04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>12</td>\n",
       "      <td>0.468060</td>\n",
       "      <td>0.475040</td>\n",
       "      <td>0.851438</td>\n",
       "      <td>03:04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>13</td>\n",
       "      <td>0.481625</td>\n",
       "      <td>0.473716</td>\n",
       "      <td>0.852206</td>\n",
       "      <td>03:03</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "#rev3\n",
    "\n",
    "splits = RandomSplitter(valid_pct=validation_ratio)(range_of(train))\n",
    "\n",
    "to = TabularPandas(train, procs=[],\n",
    "                   cont_names = cont_names_final,\n",
    "                   y_names = y_names,\n",
    "                   splits = splits)\n",
    "\n",
    "dls = to.dataloaders(bs=64)\n",
    "\n",
    "auc_metric = RocAucBinary()\n",
    "\n",
    "learn = tabular_learner(dls, layers= [300, 300, 300, 300, 300], metrics=auc_metric)\n",
    "\n",
    "learn.fit_one_cycle(number_of_epochs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
