{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Experiments Pipeline Demonstration"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This notebook first assembles an \"experiments_dict\" describing all variations of properties for each numbered experiment scenario. The training pipeline saves externally the cumulative experiments_dict populated with a log of performance metrics under repetition upon completion of each experiment scenario. \n",
    "\n",
    "A spreadsheet \"04 - Experiment scenarios key.xlsx\" is provided seperately which may serve as a key between the numbered experiment scenarios and associated property variations.\n",
    "\n",
    "The populated experiments_dict entries are then aggregated to generate the Figures in a seperate notebook \"5 - Experiments - generation of plots for figures\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "from Automunge import *\n",
    "am = AutoMunge()\n",
    "\n",
    "from catboost import CatBoostClassifier\n",
    "from catboost import CatBoostRegressor\n",
    "\n",
    "from sklearn.metrics import mean_squared_error\n",
    "from sklearn.metrics import accuracy_score\n",
    "from sklearn.metrics import roc_auc_score\n",
    "\n",
    "import datetime as dt\n",
    "from copy import deepcopy\n",
    "import pickle\n",
    "import statistics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "notebook_number = 0\n",
    "# notebook_number = 2\n",
    "# notebook_number = 2\n",
    "# notebook_number = 3\n",
    "# notebook_number = 4\n",
    "# notebook_number = 5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "assigninfill_scenarios = \\\n",
    "[\n",
    "  'stdrdinfill', \n",
    "  'adjinfill', \n",
    "  'modeinfill', \n",
    "  'MLinfill',\n",
    "]\n",
    "\n",
    "ML_cmnd_scenarios = \\\n",
    "[{'ML_cmnd'     : {'autoML_type':'randomforest'}},\n",
    " {'ML_cmnd'     : {'autoML_type':'catboost'}},\n",
    " {'ML_cmnd'     : {'autoML_type':'autogluon'}},\n",
    "]\n",
    "\n",
    "assignnan_missingatrandom_scenarios = \\\n",
    "[{'inject_ratio' : 0},\n",
    " {'inject_ratio' : 0.1},\n",
    " {'inject_ratio' : 0.33},\n",
    " {'inject_ratio' : 0.66},\n",
    " {'inject_ratio' : 1.},\n",
    "]\n",
    "\n",
    "target_missingatrandom_columns = \\\n",
    "['numeric_column',\n",
    " 'categoric_column',\n",
    "]\n",
    "\n",
    "minmax_range_notatrandom_numericcolumn = \\\n",
    "[\n",
    "  [0, 0.33],\n",
    "  [0.33, 0.67],\n",
    "  [0.67, 1.0],\n",
    "]\n",
    "\n",
    "# entry_ratio_notatrandom_categoriccolumn = \\\n",
    "# [\n",
    "#   {'topentry' : 0.},\n",
    "#   {'topentry' : 0.33},\n",
    "#   {'topentry' : 0.66},\n",
    "#   {'topentry' : 1.},\n",
    "# ]\n",
    "\n",
    "NArw_marker_scenario = \\\n",
    "[False, True]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "Datasets = \\\n",
    "[\n",
    "  'Boston_Housing',\n",
    "  'AllstateClaimsSeverity',\n",
    "  'IEEE-CIS',\n",
    "]\n",
    "\n",
    "\n",
    "Top_fifteen_Housing = \\\n",
    "['OverallQual',\n",
    " 'GrLivArea',\n",
    " 'TotalBsmtSF',\n",
    " 'BsmtFinSF1',\n",
    " 'OverallCond',\n",
    " 'YearBuilt',\n",
    " 'LotArea',\n",
    " 'GarageCars',\n",
    " 'GarageType',\n",
    " '2ndFlrSF',\n",
    " '1stFlrSF',\n",
    " 'GarageArea',\n",
    " 'YearRemodAdd',\n",
    " 'OpenPorchSF',\n",
    " 'BsmtUnfSF',\n",
    "]\n",
    "\n",
    "Housing_numeric_target   = 'GrLivArea'\n",
    "Housing_categoric_target = 'OverallQual'\n",
    "\n",
    "Housing_numeric_bin_ranking = \\\n",
    "['GrLivArea_tlbn_8',\n",
    " 'GrLivArea_tlbn_0',\n",
    " 'GrLivArea_tlbn_6',\n",
    " 'GrLivArea_tlbn_5',\n",
    " 'GrLivArea_tlbn_4',\n",
    " 'GrLivArea_tlbn_7',\n",
    " 'GrLivArea_tlbn_3',\n",
    " 'GrLivArea_tlbn_2',\n",
    " 'GrLivArea_tlbn_1']\n",
    "\n",
    "Housing_categoric_entry_ranking = \\\n",
    "['OverallQual_7.0',\n",
    " 'OverallQual_8.0',\n",
    " 'OverallQual_5.0',\n",
    " 'OverallQual_4.0',\n",
    " 'OverallQual_6.0',\n",
    " 'OverallQual_9.0',\n",
    " 'OverallQual_3.0',\n",
    " 'OverallQual_10.0',\n",
    " 'OverallQual_2.0',\n",
    " 'OverallQual_1.0']\n",
    "\n",
    "\n",
    "Top_fifteen_Allstate = \\\n",
    "['cat80',\n",
    " 'cat12',\n",
    " 'cont7',\n",
    " 'cat101',\n",
    " 'cat100',\n",
    " 'cat79',\n",
    " 'cont2',\n",
    " 'cat81',\n",
    " 'cont14',\n",
    " 'cat57',\n",
    " 'cat53',\n",
    " 'cat2',\n",
    " 'cat72',\n",
    " 'cat111',\n",
    " 'cont12']\n",
    "\n",
    "Allstate_numeric_target   = 'cont7'\n",
    "Allstate_categoric_target = 'cat80'\n",
    "\n",
    "Allstate_numeric_bin_ranking = \\\n",
    "['cont7_tlbn_8',\n",
    " 'cont7_tlbn_7',\n",
    " 'cont7_tlbn_6',\n",
    " 'cont7_tlbn_0',\n",
    " 'cont7_tlbn_5',\n",
    " 'cont7_tlbn_2',\n",
    " 'cont7_tlbn_3',\n",
    " 'cont7_tlbn_4',\n",
    " 'cont7_tlbn_1']\n",
    "\n",
    "Allstate_categoric_entry_ranking = \\\n",
    "['cat80_B', 'cat80_D', 'cat80_C', 'cat80_A']\n",
    "\n",
    "Allstate_metric2_key_categoric = \\\n",
    "{0.018307124232432304: ['cat80_B'],\n",
    " 0.08329083961923034: ['cat80_D'],\n",
    " 0.09096580710519919: ['cat80_C'],\n",
    " 0.09403209895288578: ['cat80_A']}\n",
    "\n",
    "Allstate_metric2_key_numeric = \\\n",
    "{0.00756453760699094: ['cont7_tlbn_8'],\n",
    " 0.022622084019138344: ['cont7_tlbn_7'],\n",
    " 0.02600484958623228: ['cont7_tlbn_6'],\n",
    " 0.026589563628192647: ['cont7_tlbn_0'],\n",
    " 0.026828957070519688: ['cont7_tlbn_5'],\n",
    " 0.028168605903439725: ['cont7_tlbn_2'],\n",
    " 0.02822993233999005: ['cont7_tlbn_3'],\n",
    " 0.028499276919530803: ['cont7_tlbn_4'],\n",
    " 0.02901271933636529: ['cont7_tlbn_1']}\n",
    "\n",
    "#the IEEE scenarios were not conducted due to time constraints\n",
    "\n",
    "Top_fifteen_IEEE = \\\n",
    "['card6',\n",
    " 'C13',\n",
    " 'C14',\n",
    " 'C1',\n",
    " 'V283',\n",
    " 'V294',\n",
    " 'P_emaildomain',\n",
    " 'card2',\n",
    " 'V317',\n",
    " 'TransactionAmt',\n",
    " 'card1',\n",
    " 'D15',\n",
    " 'C11',\n",
    " 'D2',\n",
    " 'V315']\n",
    "\n",
    "IEEE_numeric_target   = 'TransactionAmt'\n",
    "IEEE_categoric_target = 'card6'\n",
    "\n",
    "IEEE_numeric_bin_ranking = \\\n",
    "['TransactionAmt_tlbn_8',\n",
    " 'TransactionAmt_tlbn_0',\n",
    " 'TransactionAmt_tlbn_7',\n",
    " 'TransactionAmt_tlbn_5',\n",
    " 'TransactionAmt_tlbn_1',\n",
    " 'TransactionAmt_tlbn_3',\n",
    " 'TransactionAmt_tlbn_2',\n",
    " 'TransactionAmt_tlbn_4',\n",
    " 'TransactionAmt_tlbn_6']\n",
    "\n",
    "IEEE_categoric_entry_ranking = \\\n",
    "['card6_credit', 'card6_debit', 'card6_charge card']\n",
    "\n",
    "IEEE_metric2_key_categoric = \\\n",
    "{0.001312358180648121: ['card6_credit'],\n",
    " 0.0022183086666440754: ['card6_debit'],\n",
    " 0.003776204829478047: ['card6_charge card', 'card6_debit or credit']}\n",
    "\n",
    "IEEE_metric2_key_numeric = \\\n",
    "{0.002294510109391301: ['TransactionAmt_tlbn_8'],\n",
    " 0.0025231144376333114: ['TransactionAmt_tlbn_0'],\n",
    " 0.0031581264605275994: ['TransactionAmt_tlbn_7'],\n",
    " 0.003166593287499575: ['TransactionAmt_tlbn_5'],\n",
    " 0.0032258610763029605: ['TransactionAmt_tlbn_1'],\n",
    " 0.003259728384190641: ['TransactionAmt_tlbn_3'],\n",
    " 0.0032681952111626167: ['TransactionAmt_tlbn_2'],\n",
    " 0.0033020625190502972: ['TransactionAmt_tlbn_4'],\n",
    " 0.0033697971348256583: ['TransactionAmt_tlbn_6']}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "#experiments_dict is a numbered aggregation of each experiment scenario\n",
    "#for a summary of various scenarios with associated scenario number\n",
    "#plase see seperate file \"04 - Experiment scenarios key.xlsx\"\n",
    "\n",
    "experiments_dict = {}\n",
    "\n",
    "i = notebook_number * 480\n",
    "\n",
    "for NArw_marker in NArw_marker_scenario:\n",
    "  \n",
    "  for dataset in Datasets:\n",
    "\n",
    "    if dataset == 'Boston_Housing':\n",
    "\n",
    "#       path = 'E:/Benchmark_datasets/Housing/housing_train.csv'\n",
    "      path = 'housing_train.csv'\n",
    "\n",
    "      numeric_target = Housing_numeric_target\n",
    "      categoric_target = Housing_categoric_target\n",
    "\n",
    "      top_numeric_bin = 9\n",
    "      top_categoric_bin = 7\n",
    "\n",
    "      topfifteen = Top_fifteen_Housing\n",
    "\n",
    "      labels_column = 'SalePrice'\n",
    "      trainID_column = 'Id'\n",
    "\n",
    "      modeltype = 'regression'\n",
    "\n",
    "    if dataset == 'AllstateClaimsSeverity':\n",
    "\n",
    "#       path = 'E:/Benchmark_datasets/AllstateClaimsSeverity/train.csv'\n",
    "      path = 'allstate_train.csv'\n",
    "\n",
    "      numeric_target = Allstate_numeric_target\n",
    "      categoric_target = Allstate_categoric_target\n",
    "\n",
    "      top_numeric_bin = 9\n",
    "      top_categoric_bin = 'cat80_B'\n",
    "\n",
    "      topfifteen = Top_fifteen_Allstate\n",
    "\n",
    "      labels_column = 'loss'\n",
    "      trainID_column = 'id'\n",
    "\n",
    "      modeltype = 'regression'\n",
    "\n",
    "    if dataset == 'IEEE-CIS':\n",
    "\n",
    "      path = 'E:/Benchmark_datasets/IEEE-CISFraudDetection/train_transaction.csv'\n",
    "\n",
    "      numeric_target = IEEE_numeric_target\n",
    "      categoric_target = IEEE_categoric_target\n",
    "\n",
    "      top_numeric_bin = 9\n",
    "      top_categoric_bin = 'card6_credit'\n",
    "\n",
    "      topfifteen = Top_fifteen_IEEE\n",
    "\n",
    "      labels_column = 'isFraud'\n",
    "  #     trainID_column = False\n",
    "      trainID_column = \"TransactionID\"\n",
    "\n",
    "      modeltype = 'classification'\n",
    "    \n",
    "    for assigninfill_scenario in assigninfill_scenarios:\n",
    "\n",
    "      for targetcolumn in [numeric_target, categoric_target]:\n",
    "\n",
    "        for nantype in ['missingatrandom', 'categoric', 'numeric']:\n",
    "\n",
    "          if assigninfill_scenario != 'MLinfill':\n",
    "            ml_cmnd = {}\n",
    "\n",
    "          elif assigninfill_scenario == 'MLinfill':\n",
    "            ml_cmnd = {'autoML_type':'catboost'}\n",
    "\n",
    "          assigninfill = {assigninfill_scenario : targetcolumn}\n",
    "\n",
    "          for injectratio in assignnan_missingatrandom_scenarios:\n",
    "\n",
    "            if nantype == 'missingatrandom':\n",
    "              assignnan = {'injections' : {targetcolumn : injectratio}}\n",
    "\n",
    "            if nantype == 'categoric':\n",
    "              if targetcolumn == categoric_target:\n",
    "                assignnan = {'injections' : {targetcolumn : {'entry_ratio' : {top_categoric_bin : injectratio['inject_ratio']}}}}\n",
    "              else:\n",
    "                assignnan = False\n",
    "\n",
    "            if nantype == 'numeric':\n",
    "              if targetcolumn == numeric_target:\n",
    "                assignnan = {'injections' : {targetcolumn : {'minmax_range' : {'ratio'  : injectratio['inject_ratio'], \\\n",
    "                                                                               'ranges' : [[1/9*(top_numeric_bin-1), 1/9*(top_numeric_bin)]]}}}}\n",
    "              else:\n",
    "                assignnan = False\n",
    "\n",
    "            if assignnan is not False:\n",
    "\n",
    "              experiments_dict.update(\n",
    "              {i : {\n",
    "                'NArw_marker'  : NArw_marker,\n",
    "                'dataset'      : dataset,\n",
    "                'path'         : path,\n",
    "                'modeltype'    : modeltype,\n",
    "                'topfifteen'   : topfifteen,\n",
    "                'labels_column': labels_column,\n",
    "                'trainID_column': trainID_column,\n",
    "                'targetcolumn' : targetcolumn,\n",
    "                'assigninfill_scenario' : assigninfill_scenario,\n",
    "                'nantype'      : nantype,\n",
    "                'injectratio'  : injectratio['inject_ratio'],\n",
    "                'ML_cmnd'      : ml_cmnd,\n",
    "                'assignnan'    : assignnan,\n",
    "                'assigninfill' : assigninfill,\n",
    "              }})\n",
    "\n",
    "              i+=1\n",
    "            \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "_______________\n",
      "\n",
      "experiment:  70\n",
      "Boston_Housing\n",
      "\n",
      "ML_cmnd\n",
      "{'autoML_type': 'catboost'}\n",
      "\n",
      "assignnan\n",
      "{'injections': {'OverallQual': {'inject_ratio': 0}}}\n",
      "\n",
      "assigninfill\n",
      "{'MLinfill': 'OverallQual'}\n",
      "\n",
      "\n",
      "repetition number:  0\n",
      "score =   25303.80283291794\n",
      "\n",
      "repetition number:  1\n",
      "score =   25231.313300102604\n",
      "\n",
      "repetition number:  2\n",
      "score =   34408.8233657513\n",
      "\n",
      "repetition number:  3\n",
      "score =   31020.98580027178\n",
      "\n",
      "repetition number:  4\n",
      "score =   27274.04134110233\n",
      "\n",
      "repetition number:  5\n",
      "score =   29186.28026477407\n",
      "\n",
      "repetition number:  6\n",
      "score =   23784.70425667061\n",
      "\n",
      "repetition number:  7\n",
      "score =   26426.83100551273\n",
      "\n",
      "repetition number:  8\n",
      "score =   23446.85757181771\n",
      "\n",
      "repetition number:  9\n",
      "score =   24485.030671937075\n",
      "\n",
      "repetition number:  10\n",
      "score =   29739.745818439344\n",
      "\n",
      "repetition number:  11\n",
      "score =   24184.442476168828\n",
      "\n",
      "repetition number:  12\n",
      "score =   27416.46196476224\n",
      "\n",
      "repetition number:  13\n",
      "score =   26513.96999856017\n",
      "\n",
      "repetition number:  14\n",
      "score =   29334.66955769958\n",
      "\n",
      "repetition number:  15\n",
      "score =   24887.04708720129\n",
      "\n",
      "repetition number:  16\n",
      "score =   28420.545621672707\n",
      "\n",
      "repetition number:  17\n",
      "score =   26869.591406783784\n",
      "\n",
      "repetition number:  18\n",
      "score =   24423.76087323993\n",
      "\n",
      "repetition number:  19\n",
      "score =   29568.888033155934\n",
      "\n",
      "repetition number:  20\n",
      "score =   24826.66003360557\n",
      "\n",
      "repetition number:  21\n",
      "score =   23168.309481769662\n",
      "\n",
      "repetition number:  22\n",
      "score =   29384.925700371623\n",
      "\n",
      "repetition number:  23\n",
      "score =   26924.22622749107\n",
      "\n",
      "repetition number:  24\n",
      "score =   29054.94973799986\n",
      "\n",
      "scoremean =  27011.474577191188\n",
      "scorestdev =  2752.9062382295706\n",
      "\n",
      "_______________\n",
      "\n",
      "experiment:  71\n",
      "Boston_Housing\n",
      "\n",
      "ML_cmnd\n",
      "{'autoML_type': 'catboost'}\n",
      "\n",
      "assignnan\n",
      "{'injections': {'OverallQual': {'inject_ratio': 0.1}}}\n",
      "\n",
      "assigninfill\n",
      "{'MLinfill': 'OverallQual'}\n",
      "\n",
      "\n",
      "repetition number:  0\n",
      "score =   24529.07656590942\n",
      "\n",
      "repetition number:  1\n",
      "score =   36352.172199847955\n",
      "\n",
      "repetition number:  2\n",
      "score =   27291.14615925492\n",
      "\n",
      "repetition number:  3\n",
      "score =   43980.82348335001\n",
      "\n",
      "repetition number:  4\n",
      "score =   25107.47642093288\n",
      "\n",
      "repetition number:  5\n",
      "score =   25838.091107048454\n",
      "\n",
      "repetition number:  6\n",
      "score =   25835.806376554483\n",
      "\n",
      "repetition number:  7\n",
      "score =   37682.88650490659\n",
      "\n",
      "repetition number:  8\n",
      "score =   24286.21842594946\n",
      "\n",
      "repetition number:  9\n",
      "score =   34741.51025005696\n",
      "\n",
      "repetition number:  10\n",
      "score =   25929.015733828608\n",
      "\n",
      "repetition number:  11\n",
      "score =   26523.257149745605\n",
      "\n",
      "repetition number:  12\n",
      "score =   29570.684674659726\n",
      "\n",
      "repetition number:  13\n",
      "score =   24969.002769011786\n",
      "\n",
      "repetition number:  14\n",
      "score =   30478.041749678636\n",
      "\n",
      "repetition number:  15\n",
      "score =   31558.964242744136\n",
      "\n",
      "repetition number:  16\n",
      "score =   25291.66732685334\n",
      "\n",
      "repetition number:  17\n",
      "score =   26312.637185811138\n",
      "\n",
      "repetition number:  18\n",
      "score =   30119.868871579893\n",
      "\n",
      "repetition number:  19\n",
      "score =   32116.93101611523\n",
      "\n",
      "repetition number:  20\n",
      "score =   24169.617381541048\n",
      "\n",
      "repetition number:  21\n",
      "score =   32079.110884387013\n",
      "\n",
      "repetition number:  22\n",
      "score =   23822.565718352165\n",
      "\n",
      "repetition number:  23\n",
      "score =   32857.84371224189\n",
      "\n",
      "repetition number:  24\n",
      "score =   21082.69537941653\n",
      "\n",
      "scoremean =  28901.084451591116\n",
      "scorestdev =  5310.699573242324\n",
      "\n",
      "_______________\n",
      "\n",
      "experiment:  72\n",
      "Boston_Housing\n",
      "\n",
      "ML_cmnd\n",
      "{'autoML_type': 'catboost'}\n",
      "\n",
      "assignnan\n",
      "{'injections': {'OverallQual': {'inject_ratio': 0.33}}}\n",
      "\n",
      "assigninfill\n",
      "{'MLinfill': 'OverallQual'}\n",
      "\n",
      "\n",
      "repetition number:  0\n",
      "score =   28509.975737799883\n",
      "\n",
      "repetition number:  1\n",
      "score =   24374.763272438842\n",
      "\n",
      "repetition number:  2\n",
      "score =   32483.43197579064\n",
      "\n",
      "repetition number:  3\n",
      "score =   30807.793681550964\n",
      "\n",
      "repetition number:  4\n",
      "score =   27643.811285507112\n",
      "\n",
      "repetition number:  5\n",
      "score =   25039.81311329783\n",
      "\n",
      "repetition number:  6\n",
      "score =   29460.10971410728\n",
      "\n",
      "repetition number:  7\n",
      "score =   24060.430809189176\n",
      "\n",
      "repetition number:  8\n",
      "score =   24311.729474779113\n",
      "\n",
      "repetition number:  9\n",
      "score =   27563.51382454666\n",
      "\n",
      "repetition number:  10\n",
      "score =   33396.41028594896\n",
      "\n",
      "repetition number:  11\n",
      "score =   28925.31197804501\n",
      "\n",
      "repetition number:  12\n",
      "score =   27279.981463274074\n",
      "\n",
      "repetition number:  13\n",
      "score =   24388.941394499056\n",
      "\n",
      "repetition number:  14\n",
      "score =   25149.293908767537\n",
      "\n",
      "repetition number:  15\n",
      "score =   31760.48773082684\n",
      "\n",
      "repetition number:  16\n",
      "score =   28985.270630071926\n",
      "\n",
      "repetition number:  17\n",
      "score =   34209.78682921082\n",
      "\n",
      "repetition number:  18\n",
      "score =   28532.6811901372\n",
      "\n",
      "repetition number:  19\n",
      "score =   24278.817951142362\n",
      "\n",
      "repetition number:  20\n",
      "score =   22084.840816922308\n",
      "\n",
      "repetition number:  21\n",
      "score =   24012.85455221481\n",
      "\n",
      "repetition number:  22\n",
      "score =   33227.87081164368\n",
      "\n",
      "repetition number:  23\n",
      "score =   25186.339274611222\n",
      "\n",
      "repetition number:  24\n",
      "score =   28688.158298689614\n",
      "\n",
      "scoremean =  27774.496800200515\n",
      "scorestdev =  3455.222128663497\n",
      "\n",
      "_______________\n",
      "\n",
      "experiment:  73\n",
      "Boston_Housing\n",
      "\n",
      "ML_cmnd\n",
      "{'autoML_type': 'catboost'}\n",
      "\n",
      "assignnan\n",
      "{'injections': {'OverallQual': {'inject_ratio': 0.66}}}\n",
      "\n",
      "assigninfill\n",
      "{'MLinfill': 'OverallQual'}\n",
      "\n",
      "\n",
      "repetition number:  0\n",
      "score =   22792.99513696557\n",
      "\n",
      "repetition number:  1\n",
      "score =   25551.28389992356\n",
      "\n",
      "repetition number:  2\n",
      "score =   23541.267623363263\n",
      "\n",
      "repetition number:  3\n",
      "score =   24352.75725006817\n",
      "\n",
      "repetition number:  4\n",
      "score =   30414.338759327686\n",
      "\n",
      "repetition number:  5\n",
      "score =   31013.582976224676\n",
      "\n",
      "repetition number:  6\n",
      "score =   27848.544897667936\n",
      "\n",
      "repetition number:  7\n",
      "score =   27766.654892771636\n",
      "\n",
      "repetition number:  8\n",
      "score =   26998.64068890569\n",
      "\n",
      "repetition number:  9\n",
      "score =   27290.203341055494\n",
      "\n",
      "repetition number:  10\n",
      "score =   28439.984853501242\n",
      "\n",
      "repetition number:  11\n",
      "score =   27618.620808623622\n",
      "\n",
      "repetition number:  12\n",
      "score =   32982.26996884064\n",
      "\n",
      "repetition number:  13\n",
      "score =   32615.23748409893\n",
      "\n",
      "repetition number:  14\n",
      "score =   30610.89776710831\n",
      "\n",
      "repetition number:  15\n",
      "score =   24898.36545395316\n",
      "\n",
      "repetition number:  16\n",
      "score =   32020.90609920345\n",
      "\n",
      "repetition number:  17\n",
      "score =   28679.690957122562\n",
      "\n",
      "repetition number:  18\n",
      "score =   26897.13740823633\n",
      "\n",
      "repetition number:  19\n",
      "score =   28955.797545495716\n",
      "\n",
      "repetition number:  20\n",
      "score =   27945.08218000776\n",
      "\n",
      "repetition number:  21\n",
      "score =   25700.290784193647\n",
      "\n",
      "repetition number:  22\n",
      "score =   33713.566737800815\n",
      "\n",
      "repetition number:  23\n",
      "score =   32829.37086168574\n",
      "\n",
      "repetition number:  24\n",
      "score =   24987.32101191513\n",
      "\n",
      "scoremean =  28258.592375522425\n",
      "scorestdev =  3116.549651072266\n",
      "\n",
      "_______________\n",
      "\n",
      "experiment:  74\n",
      "Boston_Housing\n",
      "\n",
      "ML_cmnd\n",
      "{'autoML_type': 'catboost'}\n",
      "\n",
      "assignnan\n",
      "{'injections': {'OverallQual': {'inject_ratio': 1.0}}}\n",
      "\n",
      "assigninfill\n",
      "{'MLinfill': 'OverallQual'}\n",
      "\n",
      "\n",
      "repetition number:  0\n",
      "score =   32936.94206805895\n",
      "\n",
      "repetition number:  1\n",
      "score =   28424.071334211316\n",
      "\n",
      "repetition number:  2\n",
      "score =   35434.30528598555\n",
      "\n",
      "repetition number:  3\n",
      "score =   26761.072951993356\n",
      "\n",
      "repetition number:  4\n",
      "score =   31274.238814425644\n",
      "\n",
      "repetition number:  5\n",
      "score =   31120.272898490766\n",
      "\n",
      "repetition number:  6\n",
      "score =   33301.8969140642\n",
      "\n",
      "repetition number:  7\n",
      "score =   28850.59764113231\n",
      "\n",
      "repetition number:  8\n",
      "score =   24455.57128262815\n",
      "\n",
      "repetition number:  9\n",
      "score =   38588.28101435716\n",
      "\n",
      "repetition number:  10\n",
      "score =   24960.767624924858\n",
      "\n",
      "repetition number:  11\n",
      "score =   26871.039849002173\n",
      "\n",
      "repetition number:  12\n",
      "score =   26488.91688145515\n",
      "\n",
      "repetition number:  13\n",
      "score =   25926.21753542992\n",
      "\n",
      "repetition number:  14\n",
      "score =   27214.290719205375\n",
      "\n",
      "repetition number:  15\n",
      "score =   27812.825696133077\n",
      "\n",
      "repetition number:  16\n",
      "score =   24995.679195031746\n",
      "\n",
      "repetition number:  17\n",
      "score =   33036.12296707158\n",
      "\n",
      "repetition number:  18\n",
      "score =   31046.828938294828\n",
      "\n",
      "repetition number:  19\n",
      "score =   25433.879577308355\n",
      "\n",
      "repetition number:  20\n",
      "score =   25807.78086072446\n",
      "\n",
      "repetition number:  21\n",
      "score =   27545.051751538296\n",
      "\n",
      "repetition number:  22\n",
      "score =   37079.68575234045\n",
      "\n",
      "repetition number:  23\n",
      "score =   31926.14964129277\n",
      "\n",
      "repetition number:  24\n",
      "score =   27746.091895942736\n",
      "\n",
      "scoremean =  29401.543163641723\n",
      "scorestdev =  3964.1679782387605\n",
      "\n",
      "_______________\n",
      "\n",
      "experiment:  75\n",
      "Boston_Housing\n",
      "\n",
      "ML_cmnd\n",
      "{'autoML_type': 'catboost'}\n",
      "\n",
      "assignnan\n",
      "{'injections': {'OverallQual': {'entry_ratio': {7: 0}}}}\n",
      "\n",
      "assigninfill\n",
      "{'MLinfill': 'OverallQual'}\n",
      "\n",
      "\n",
      "repetition number:  0\n",
      "score =   24035.448228819994\n",
      "\n",
      "repetition number:  1\n",
      "score =   24062.963961291844\n",
      "\n",
      "repetition number:  2\n",
      "score =   21523.840553247795\n",
      "\n",
      "repetition number:  3\n",
      "score =   32183.506960652252\n",
      "\n",
      "repetition number:  4\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "score =   25521.92770602078\n",
      "\n",
      "repetition number:  5\n",
      "score =   23365.119199088844\n",
      "\n",
      "repetition number:  6\n",
      "score =   29952.18913587778\n",
      "\n",
      "repetition number:  7\n",
      "score =   28154.16117572973\n",
      "\n",
      "repetition number:  8\n",
      "score =   33290.50686501203\n",
      "\n",
      "repetition number:  9\n",
      "score =   27261.973171426496\n",
      "\n",
      "repetition number:  10\n",
      "score =   21308.27842921409\n",
      "\n",
      "repetition number:  11\n",
      "score =   34690.880991600854\n",
      "\n",
      "repetition number:  12\n",
      "score =   23902.671711297964\n",
      "\n",
      "repetition number:  13\n",
      "score =   24123.265264082394\n",
      "\n",
      "repetition number:  14\n",
      "score =   23174.39250742695\n",
      "\n",
      "repetition number:  15\n",
      "score =   24875.22354560946\n",
      "\n",
      "repetition number:  16\n",
      "score =   24243.165081907642\n",
      "\n",
      "repetition number:  17\n",
      "score =   26354.751654580516\n",
      "\n",
      "repetition number:  18\n",
      "score =   25674.309271456204\n",
      "\n",
      "repetition number:  19\n",
      "score =   31253.51448415184\n",
      "\n",
      "repetition number:  20\n",
      "score =   27319.556140544326\n",
      "\n",
      "repetition number:  21\n",
      "score =   24850.99555577586\n",
      "\n",
      "repetition number:  22\n",
      "score =   23609.245556439964\n",
      "\n",
      "repetition number:  23\n",
      "score =   29260.389632247578\n",
      "\n",
      "repetition number:  24\n",
      "score =   26613.698283857153\n",
      "\n",
      "scoremean =  26424.23900269442\n",
      "scorestdev =  3588.028552601468\n",
      "\n",
      "_______________\n",
      "\n",
      "experiment:  76\n",
      "Boston_Housing\n",
      "\n",
      "ML_cmnd\n",
      "{'autoML_type': 'catboost'}\n",
      "\n",
      "assignnan\n",
      "{'injections': {'OverallQual': {'entry_ratio': {7: 0.1}}}}\n",
      "\n",
      "assigninfill\n",
      "{'MLinfill': 'OverallQual'}\n",
      "\n",
      "\n",
      "repetition number:  0\n",
      "score =   30666.945763871056\n",
      "\n",
      "repetition number:  1\n",
      "score =   36296.07140939772\n",
      "\n",
      "repetition number:  2\n",
      "score =   26207.050211061545\n",
      "\n",
      "repetition number:  3\n",
      "score =   28377.241609650646\n",
      "\n",
      "repetition number:  4\n",
      "score =   26848.271037821898\n",
      "\n",
      "repetition number:  5\n",
      "score =   25935.36244128683\n",
      "\n",
      "repetition number:  6\n",
      "score =   36852.22449745247\n",
      "\n",
      "repetition number:  7\n",
      "score =   24473.31773465885\n",
      "\n",
      "repetition number:  8\n",
      "score =   30209.769479813767\n",
      "\n",
      "repetition number:  9\n",
      "score =   22543.727913419112\n",
      "\n",
      "repetition number:  10\n",
      "score =   31087.59977341946\n",
      "\n",
      "repetition number:  11\n",
      "score =   26447.751327722937\n",
      "\n",
      "repetition number:  12\n",
      "score =   30839.063721586186\n",
      "\n",
      "repetition number:  13\n",
      "score =   25236.728675204595\n",
      "\n",
      "repetition number:  14\n",
      "score =   27654.152047218002\n",
      "\n",
      "repetition number:  15\n",
      "score =   27960.37606645473\n",
      "\n",
      "repetition number:  16\n",
      "score =   35526.914111775026\n",
      "\n",
      "repetition number:  17\n",
      "score =   28575.72746296376\n",
      "\n",
      "repetition number:  18\n",
      "score =   28942.297189709578\n",
      "\n",
      "repetition number:  19\n",
      "score =   34593.51968072197\n",
      "\n",
      "repetition number:  20\n",
      "score =   19561.62332563057\n",
      "\n",
      "repetition number:  21\n",
      "score =   25728.47235163657\n",
      "\n",
      "repetition number:  22\n",
      "score =   29180.44467504445\n",
      "\n",
      "repetition number:  23\n",
      "score =   22728.127156753366\n",
      "\n",
      "repetition number:  24\n",
      "score =   26655.157351710714\n",
      "\n",
      "scoremean =  28365.117480639434\n",
      "scorestdev =  4298.6098020458\n",
      "\n",
      "_______________\n",
      "\n",
      "experiment:  77\n",
      "Boston_Housing\n",
      "\n",
      "ML_cmnd\n",
      "{'autoML_type': 'catboost'}\n",
      "\n",
      "assignnan\n",
      "{'injections': {'OverallQual': {'entry_ratio': {7: 0.33}}}}\n",
      "\n",
      "assigninfill\n",
      "{'MLinfill': 'OverallQual'}\n",
      "\n",
      "\n",
      "repetition number:  0\n",
      "score =   31548.76112699827\n",
      "\n",
      "repetition number:  1\n",
      "score =   32378.39715962263\n",
      "\n",
      "repetition number:  2\n",
      "score =   25451.952332956735\n",
      "\n",
      "repetition number:  3\n",
      "score =   34547.40153965924\n",
      "\n",
      "repetition number:  4\n",
      "score =   23527.688890959605\n",
      "\n",
      "repetition number:  5\n",
      "score =   32018.028147571287\n",
      "\n",
      "repetition number:  6\n",
      "score =   25283.37432456804\n",
      "\n",
      "repetition number:  7\n",
      "score =   37475.347412449395\n",
      "\n",
      "repetition number:  8\n",
      "score =   30591.13340804385\n",
      "\n",
      "repetition number:  9\n",
      "score =   28879.96181323537\n",
      "\n",
      "repetition number:  10\n",
      "score =   23393.996354663705\n",
      "\n",
      "repetition number:  11\n",
      "score =   29722.542423441468\n",
      "\n",
      "repetition number:  12\n",
      "score =   22081.423531306486\n",
      "\n",
      "repetition number:  13\n",
      "score =   25116.597288555706\n",
      "\n",
      "repetition number:  14\n",
      "score =   22579.985187360606\n",
      "\n",
      "repetition number:  15\n",
      "score =   25402.134062061472\n",
      "\n",
      "repetition number:  16\n",
      "score =   23164.46540544978\n",
      "\n",
      "repetition number:  17\n",
      "score =   29939.21596357665\n",
      "\n",
      "repetition number:  18\n",
      "score =   29986.631853028935\n",
      "\n",
      "repetition number:  19\n",
      "score =   35489.524250401475\n",
      "\n",
      "repetition number:  20\n",
      "score =   27375.65015487627\n",
      "\n",
      "repetition number:  21\n",
      "score =   26972.97009874068\n",
      "\n",
      "repetition number:  22\n",
      "score =   28182.78887998655\n",
      "\n",
      "repetition number:  23\n",
      "score =   33606.74351388637\n",
      "\n",
      "repetition number:  24\n",
      "score =   25359.31165058292\n",
      "\n",
      "scoremean =  28403.041070959334\n",
      "scorestdev =  4323.297441172602\n",
      "\n",
      "_______________\n",
      "\n",
      "experiment:  78\n",
      "Boston_Housing\n",
      "\n",
      "ML_cmnd\n",
      "{'autoML_type': 'catboost'}\n",
      "\n",
      "assignnan\n",
      "{'injections': {'OverallQual': {'entry_ratio': {7: 0.66}}}}\n",
      "\n",
      "assigninfill\n",
      "{'MLinfill': 'OverallQual'}\n",
      "\n",
      "\n",
      "repetition number:  0\n",
      "score =   27824.103041540744\n",
      "\n",
      "repetition number:  1\n",
      "score =   23786.48715801893\n",
      "\n",
      "repetition number:  2\n",
      "score =   27991.989761076777\n",
      "\n",
      "repetition number:  3\n",
      "score =   24181.34072937135\n",
      "\n",
      "repetition number:  4\n",
      "score =   24737.996885675948\n",
      "\n",
      "repetition number:  5\n",
      "score =   34724.05697595148\n",
      "\n",
      "repetition number:  6\n",
      "score =   31217.119724209795\n",
      "\n",
      "repetition number:  7\n",
      "score =   23590.753684865304\n",
      "\n",
      "repetition number:  8\n",
      "score =   28858.55784016779\n",
      "\n",
      "repetition number:  9\n",
      "score =   22769.754086286364\n",
      "\n",
      "repetition number:  10\n",
      "score =   33690.73819995645\n",
      "\n",
      "repetition number:  11\n",
      "score =   28823.772338628634\n",
      "\n",
      "repetition number:  12\n",
      "score =   23134.271649968887\n",
      "\n",
      "repetition number:  13\n",
      "score =   22191.53137671746\n",
      "\n",
      "repetition number:  14\n",
      "score =   21128.534340541122\n",
      "\n",
      "repetition number:  15\n",
      "score =   35238.52551955555\n",
      "\n",
      "repetition number:  16\n",
      "score =   27391.601223802158\n",
      "\n",
      "repetition number:  17\n",
      "score =   23709.23745932438\n",
      "\n",
      "repetition number:  18\n",
      "score =   25877.77755295005\n",
      "\n",
      "repetition number:  19\n",
      "score =   31664.912658039735\n",
      "\n",
      "repetition number:  20\n",
      "score =   23020.688305075575\n",
      "\n",
      "repetition number:  21\n",
      "score =   32029.917120068567\n",
      "\n",
      "repetition number:  22\n",
      "score =   23314.504272364433\n",
      "\n",
      "repetition number:  23\n",
      "score =   29683.7529223759\n",
      "\n",
      "repetition number:  24\n",
      "score =   25296.59359707356\n",
      "\n",
      "scoremean =  27035.14073694428\n",
      "scorestdev =  4197.061608660091\n",
      "\n",
      "_______________\n",
      "\n",
      "experiment:  79\n",
      "Boston_Housing\n",
      "\n",
      "ML_cmnd\n",
      "{'autoML_type': 'catboost'}\n",
      "\n",
      "assignnan\n",
      "{'injections': {'OverallQual': {'entry_ratio': {7: 1.0}}}}\n",
      "\n",
      "assigninfill\n",
      "{'MLinfill': 'OverallQual'}\n",
      "\n",
      "\n",
      "repetition number:  0\n",
      "score =   23986.54116218329\n",
      "\n",
      "repetition number:  1\n",
      "score =   27542.92255181151\n",
      "\n",
      "repetition number:  2\n",
      "score =   24781.519058589707\n",
      "\n",
      "repetition number:  3\n",
      "score =   28525.91055523994\n",
      "\n",
      "repetition number:  4\n",
      "score =   27320.871050400194\n",
      "\n",
      "repetition number:  5\n",
      "score =   29831.413383094103\n",
      "\n",
      "repetition number:  6\n",
      "score =   27557.878606984774\n",
      "\n",
      "repetition number:  7\n",
      "score =   37231.600810020136\n",
      "\n",
      "repetition number:  8\n",
      "score =   29568.35882514983\n",
      "\n",
      "repetition number:  9\n",
      "score =   25430.756814043732\n",
      "\n",
      "repetition number:  10\n",
      "score =   24440.360380247766\n",
      "\n",
      "repetition number:  11\n",
      "score =   35429.144046716596\n",
      "\n",
      "repetition number:  12\n",
      "score =   30799.46235356998\n",
      "\n",
      "repetition number:  13\n",
      "score =   25818.247894590368\n",
      "\n",
      "repetition number:  14\n",
      "score =   27172.802236997373\n",
      "\n",
      "repetition number:  15\n",
      "score =   23011.61488491942\n",
      "\n",
      "repetition number:  16\n",
      "score =   24917.366513679644\n",
      "\n",
      "repetition number:  17\n",
      "score =   27441.220123063602\n",
      "\n",
      "repetition number:  18\n",
      "score =   26327.583415019915\n",
      "\n",
      "repetition number:  19\n",
      "score =   31469.789925112585\n",
      "\n",
      "repetition number:  20\n",
      "score =   27259.641767549736\n",
      "\n",
      "repetition number:  21\n",
      "score =   25576.96359229647\n",
      "\n",
      "repetition number:  22\n",
      "score =   29254.06556407778\n",
      "\n",
      "repetition number:  23\n",
      "score =   31100.22213934566\n",
      "\n",
      "repetition number:  24\n",
      "score =   28778.820108792734\n",
      "\n",
      "scoremean =  28023.003110539878\n",
      "scorestdev =  3379.610108362301\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# for experiment in experiments_dict:\n",
    "#first batch are for experiments without NArw\n",
    "#other two batches are scenarios of ML infill with NArw\n",
    "for experiment in list(range(160)) + list(range(300, 320, 1)) + list(range(380, 400, 1)):\n",
    "\n",
    "  print('_______________')\n",
    "  print()\n",
    "  print('experiment: ', experiment)\n",
    "  print(experiments_dict[experiment]['dataset'])\n",
    "  print()\n",
    "\n",
    "  #import data\n",
    "  df_train = pd.read_csv(experiments_dict[experiment]['path'], error_bad_lines=False)\n",
    "\n",
    "  #take top fifteen columns\n",
    "  df_train = df_train[experiments_dict[experiment]['topfifteen'] + [experiments_dict[experiment]['labels_column'], experiments_dict[experiment]['trainID_column']]]\n",
    "\n",
    "  #only assigncat is for classificaiton convert to string for catboost\n",
    "  if experiments_dict[experiment]['modeltype'] == 'classification':\n",
    "    assigncat = {'lbos' : experiments_dict[experiment]['labels_column']}\n",
    "  else:\n",
    "    assigncat = {}\n",
    "\n",
    "  #this way NArw_marker only included on target column\n",
    "  transformdict = {'numeric' : {'auntsuncles' : ['nmbr']},\n",
    "                   'categoric' : {'auntsuncles' : ['1010']},\n",
    "                  }\n",
    "\n",
    "  if experiments_dict[experiment]['NArw_marker'] is True:\n",
    "    transformdict['numeric']['auntsuncles'].append('NArw')\n",
    "    transformdict['categoric']['auntsuncles'].append('NArw')\n",
    "\n",
    "  processdict = {'numeric' : {'functionpointer' : 'nmbr'},\n",
    "                 'categoric' : {'functionpointer' : '1010'},\n",
    "                }\n",
    "\n",
    "  targetcolumn = experiments_dict[experiment]['targetcolumn']\n",
    "  if targetcolumn in [Housing_numeric_target, Allstate_numeric_target, IEEE_numeric_target]:\n",
    "    assigncat.update({'numeric' : targetcolumn})\n",
    "  elif targetcolumn in [Housing_categoric_target, Allstate_categoric_target, IEEE_categoric_target]:\n",
    "    assigncat.update({'categoric' : targetcolumn})\n",
    "\n",
    "  #labels and ID sets designation\n",
    "  labels_column = experiments_dict[experiment]['labels_column']\n",
    "  trainID_column = experiments_dict[experiment]['trainID_column']\n",
    "\n",
    "  ML_cmnd      = deepcopy(experiments_dict[experiment]['ML_cmnd'])\n",
    "  assignnan    = deepcopy(experiments_dict[experiment]['assignnan'])\n",
    "  assigninfill = deepcopy(experiments_dict[experiment]['assigninfill'])\n",
    "\n",
    "  #we'll use GPU 1\n",
    "#     GPU_device = '1'\n",
    "  #for macbook\n",
    "  GPU_device = '0'\n",
    "\n",
    "#   ML_cmnd.update({'MLinfill_cmnd' : {'catboost_classifier_model'   : {'task_type' : 'GPU', 'devices' : GPU_device },\n",
    "#                                      'catboost_regressor_model'    : {'task_type' : 'GPU', 'devices' : GPU_device }}})\n",
    "\n",
    "#     ML_cmnd.update({'MLinfill_cmnd' : {'catboost_classifier_model'   : {'devices' : GPU_device },\n",
    "#                                        'catboost_regressor_model'    : {'devices' : GPU_device }}})\n",
    "\n",
    "  print('ML_cmnd')\n",
    "  print(ML_cmnd)\n",
    "  print()\n",
    "  print('assignnan')\n",
    "  print(assignnan)\n",
    "  print()\n",
    "  print('assigninfill')\n",
    "  print(assigninfill)\n",
    "  print()\n",
    "\n",
    "  score_log = []\n",
    "  \n",
    "  #this is the number of repetitions for each experiment scenario\n",
    "  for j in range(25):\n",
    "\n",
    "    print()\n",
    "    print('repetition number: ', j)\n",
    "\n",
    "\n",
    "    #now prepare the data\n",
    "    train, trainID, labels, \\\n",
    "    validation1, validationID1, validationlabels1, \\\n",
    "    test, testID, testlabels, \\\n",
    "    postprocess_dict \\\n",
    "    = am.automunge(df_train,\n",
    "                   labels_column = labels_column,\n",
    "                   trainID_column = trainID_column,\n",
    "                   valpercent=0.25,\n",
    "                   assigncat = assigncat,\n",
    "                   ML_cmnd = ML_cmnd,\n",
    "                   assignnan = assignnan,\n",
    "                   assigninfill = assigninfill,\n",
    "                   transformdict = transformdict,\n",
    "                   processdict = processdict,\n",
    "                   printstatus = False\n",
    "                  )\n",
    "\n",
    "    #catboost accepts categoric features designation\n",
    "    categorical_features_indices = \\\n",
    "    postprocess_dict['columntype_report']['boolean'] + postprocess_dict['columntype_report']['ordinal'] \\\n",
    "    + postprocess_dict['columntype_report']['onehot'] + postprocess_dict['columntype_report']['binary']\n",
    "\n",
    "    #now train our model and access the metric score on validation data\n",
    "    if experiments_dict[experiment]['modeltype'] == 'classification':\n",
    "\n",
    "      #metric = 'accuracy_score'\n",
    "      metric = 'roc_auc_score'\n",
    "\n",
    "#       model = CatBoostClassifier(task_type='GPU',\n",
    "#                                 devices = GPU_device,)\n",
    "\n",
    "      model = CatBoostClassifier(devices = GPU_device,)\n",
    "\n",
    "      model.fit(train, \n",
    "                labels,\n",
    "                eval_set=(validation1, validationlabels1),\n",
    "                cat_features= categorical_features_indices,\n",
    "                verbose=False,\n",
    "               )\n",
    "\n",
    "      #evaluate results on validation\n",
    "      #inferred_labels = model.predict(validation1)\n",
    "      inferred_labels = model.predict_proba(validation1)\n",
    "\n",
    "      #score = accuracy_score(validationlabels1.to_numpy().ravel(), inferred_labels)\n",
    "      score = roc_auc_score(validationlabels1.astype(int).to_numpy().ravel(), inferred_labels[:,0])\n",
    "\n",
    "      score_log.append(score)\n",
    "\n",
    "      print('score =  ', score)\n",
    "\n",
    "\n",
    "    elif experiments_dict[experiment]['modeltype'] == 'regression':\n",
    "\n",
    "      metric = 'rmse'\n",
    "\n",
    "#       model = CatBoostRegressor(task_type='GPU',\n",
    "#                                 devices = GPU_device,)\n",
    "\n",
    "      model = CatBoostRegressor(devices = GPU_device,)\n",
    "\n",
    "      model.fit(train, \n",
    "                labels,\n",
    "                eval_set=(validation1, validationlabels1),\n",
    "                cat_features= categorical_features_indices,\n",
    "                verbose=False,\n",
    "               )\n",
    "\n",
    "      #evaluate results on validation\n",
    "      inferred_labels = model.predict(validation1)\n",
    "\n",
    "      score = mean_squared_error(validationlabels1.to_numpy().ravel(), inferred_labels, squared=False)\n",
    "\n",
    "      score_log.append(score)\n",
    "\n",
    "      print('score =  ', score)\n",
    "\n",
    "  #now aggregate the repetitions\n",
    "  scoremean = sum(score_log) / len(score_log)\n",
    "  scorestdev = statistics.stdev(score_log)\n",
    "\n",
    "\n",
    "  print()\n",
    "  print('scoremean = ', scoremean)\n",
    "  print('scorestdev = ', scorestdev)\n",
    "  print()\n",
    "\n",
    "  experiments_dict[experiment].update({'metric' : metric,\n",
    "                                       'score'  : scoremean,\n",
    "                                       'score_log' : score_log,\n",
    "                                       'scorestdev' : scorestdev,\n",
    "                                      })\n",
    "\n",
    "  experiments_dict_filename = 'infill_experiments_gpusetting4_' + str(experiment) + '.pickle'\n",
    "\n",
    "  with open(experiments_dict_filename, 'wb') as handle:\n",
    "    pickle.dump(experiments_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
