{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "02d2f247",
   "metadata": {},
   "outputs": [],
   "source": [
    "from Automunge import *\n",
    "am = AutoMunge()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "c2cf345f",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "462dc56d",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_train = pd.DataFrame({'feature' : ['twenty one', 'twenty two', 'twenty one', 'twenty two', 'three']})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "3b4977f8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "_______________\n",
      "Begin Automunge\n",
      "\n",
      "______\n",
      "\n",
      "versioning serial stamp:\n",
      "_8.30_999915197215\n",
      "\n",
      "Automunge returned train column set: \n",
      "['feature_1010_0', 'feature_1010_1']\n",
      "\n",
      "Automunge returned ID column set: \n",
      "['Automunge_index']\n",
      "\n",
      "_______________\n",
      "Automunge Complete\n",
      "\n"
     ]
    }
   ],
   "source": [
    "train, train_ID, labels, \\\n",
    "val, val_ID, val_labels, \\\n",
    "test, test_ID, test_labels, \\\n",
    "postprocess_dict = \\\n",
    "am.automunge(df_train,\n",
    "            MLinfill=False,\n",
    "             shuffletrain=False,\n",
    "             NArw_marker = False,\n",
    "             assigncat={'1010':'feature'},\n",
    "            )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "88594e32",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>feature_1010_0</th>\n",
       "      <th>feature_1010_1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   feature_1010_0  feature_1010_1\n",
       "0               1               0\n",
       "1               1               1\n",
       "2               1               0\n",
       "3               1               1\n",
       "4               0               1"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "e02b3b0f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>feature_onht_0</th>\n",
       "      <th>feature_onht_1</th>\n",
       "      <th>feature_onht_2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   feature_onht_0  feature_onht_1  feature_onht_2\n",
       "0               1               0               0\n",
       "1               0               1               0\n",
       "2               1               0               0\n",
       "3               0               1               0\n",
       "4               0               0               1"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train, train_ID, labels, \\\n",
    "val, val_ID, val_labels, \\\n",
    "test, test_ID, test_labels, \\\n",
    "postprocess_dict = \\\n",
    "am.automunge(df_train,\n",
    "            MLinfill=False,\n",
    "             shuffletrain=False,\n",
    "             NArw_marker = False,\n",
    "             assigncat={'onht':'feature'},\n",
    "             printstatus=False,\n",
    "            )\n",
    "\n",
    "train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "8b77517a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>feature_ord3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   feature_ord3\n",
       "0             1\n",
       "1             2\n",
       "2             1\n",
       "3             2\n",
       "4             3"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train, train_ID, labels, \\\n",
    "val, val_ID, val_labels, \\\n",
    "test, test_ID, test_labels, \\\n",
    "postprocess_dict = \\\n",
    "am.automunge(df_train,\n",
    "            MLinfill=False,\n",
    "             shuffletrain=False,\n",
    "             NArw_marker = False,\n",
    "             assigncat={'ord3':'feature'},\n",
    "             printstatus=False,\n",
    "            )\n",
    "\n",
    "train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "adfb388b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>feature_ordl</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   feature_ordl\n",
       "0             2\n",
       "1             3\n",
       "2             2\n",
       "3             3\n",
       "4             1"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train, train_ID, labels, \\\n",
    "val, val_ID, val_labels, \\\n",
    "test, test_ID, test_labels, \\\n",
    "postprocess_dict = \\\n",
    "am.automunge(df_train,\n",
    "            MLinfill=False,\n",
    "             shuffletrain=False,\n",
    "             NArw_marker = False,\n",
    "             assigncat={'ordl':'feature'},\n",
    "             printstatus=False,\n",
    "            )\n",
    "\n",
    "train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "2e07ec54",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>feature_hsh2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   feature_hsh2\n",
       "0             5\n",
       "1             5\n",
       "2             5\n",
       "3             5\n",
       "4             4"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train, train_ID, labels, \\\n",
    "val, val_ID, val_labels, \\\n",
    "test, test_ID, test_labels, \\\n",
    "postprocess_dict = \\\n",
    "am.automunge(df_train,\n",
    "            MLinfill=False,\n",
    "             shuffletrain=False,\n",
    "             NArw_marker = False,\n",
    "             assigncat={'hsh2':'feature'},\n",
    "             printstatus=False,\n",
    "            )\n",
    "\n",
    "train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "d54ab394",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>feature_or19_sp13_ord3</th>\n",
       "      <th>feature_or19_sp13_sp10_ord3</th>\n",
       "      <th>feature_or19_nmc8_nmbr</th>\n",
       "      <th>feature_or19_1010_0</th>\n",
       "      <th>feature_or19_1010_1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   feature_or19_sp13_ord3  feature_or19_sp13_sp10_ord3  \\\n",
       "0                       1                            1   \n",
       "1                       1                            1   \n",
       "2                       1                            1   \n",
       "3                       1                            1   \n",
       "4                       2                            1   \n",
       "\n",
       "   feature_or19_nmc8_nmbr  feature_or19_1010_0  feature_or19_1010_1  \n",
       "0                     0.0                    1                    0  \n",
       "1                     0.0                    1                    1  \n",
       "2                     0.0                    1                    0  \n",
       "3                     0.0                    1                    1  \n",
       "4                     0.0                    0                    1  "
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train, train_ID, labels, \\\n",
    "val, val_ID, val_labels, \\\n",
    "test, test_ID, test_labels, \\\n",
    "postprocess_dict = \\\n",
    "am.automunge(df_train,\n",
    "            MLinfill=False,\n",
    "             shuffletrain=False,\n",
    "             NArw_marker = False,\n",
    "             assigncat={'or19':'feature'},\n",
    "             printstatus=False,\n",
    "            )\n",
    "\n",
    "train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "c3b85f2a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>feature</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>twenty one</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>twenty two</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>twenty one</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>twenty two</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>three</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      feature\n",
       "0  twenty one\n",
       "1  twenty two\n",
       "2  twenty one\n",
       "3  twenty two\n",
       "4       three"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "ce50fed2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>feature</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   feature\n",
       "0        1\n",
       "1       13\n",
       "2        5\n",
       "3        0\n",
       "4        7"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train = pd.DataFrame({'feature' : [1, 13, 5, 0, 7]})\n",
    "df_train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "1c8fe1b4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>feature_nmbr</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-0.805313</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.495582</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-0.038348</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-0.997054</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.345134</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   feature_nmbr\n",
       "0     -0.805313\n",
       "1      1.495582\n",
       "2     -0.038348\n",
       "3     -0.997054\n",
       "4      0.345134"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train, train_ID, labels, \\\n",
    "val, val_ID, val_labels, \\\n",
    "test, test_ID, test_labels, \\\n",
    "postprocess_dict = \\\n",
    "am.automunge(df_train,\n",
    "            MLinfill=False,\n",
    "             shuffletrain=False,\n",
    "             NArw_marker = False,\n",
    "#              assigncat={'nmbr':'feature'},\n",
    "             printstatus=False,\n",
    "            )\n",
    "\n",
    "train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "472ec6d2",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/nicholasteague/opt/anaconda3/lib/python3.9/site-packages/sklearn/preprocessing/_data.py:2590: UserWarning: n_quantiles (1000) is greater than the total number of samples (5). n_quantiles is set to n_samples.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>feature_qttf</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-0.674490</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>5.199337</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-5.199337</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.674490</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   feature_qttf\n",
       "0     -0.674490\n",
       "1      5.199337\n",
       "2      0.000000\n",
       "3     -5.199337\n",
       "4      0.674490"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train, train_ID, labels, \\\n",
    "val, val_ID, val_labels, \\\n",
    "test, test_ID, test_labels, \\\n",
    "postprocess_dict = \\\n",
    "am.automunge(df_train,\n",
    "            MLinfill=False,\n",
    "             shuffletrain=False,\n",
    "             NArw_marker = False,\n",
    "             assigncat={'qttf':'feature'},\n",
    "             printstatus=False,\n",
    "            )\n",
    "\n",
    "train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "a950de2b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>feature_nmbr</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-0.805313</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.495582</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-0.038348</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-0.997054</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.345134</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   feature_nmbr\n",
       "0     -0.805313\n",
       "1      1.495582\n",
       "2     -0.038348\n",
       "3     -0.997054\n",
       "4      0.345134"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train, train_ID, labels, \\\n",
    "val, val_ID, val_labels, \\\n",
    "test, test_ID, test_labels, \\\n",
    "postprocess_dict = \\\n",
    "am.automunge(df_train,\n",
    "            MLinfill=False,\n",
    "             shuffletrain=False,\n",
    "             NArw_marker = False,\n",
    "#              assigncat={'qttf':'feature'},\n",
    "             powertransform=True,\n",
    "             printstatus=False,\n",
    "            )\n",
    "\n",
    "train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "a42b74d5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>feature_mnmx</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.076923</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.384615</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.538462</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   feature_mnmx\n",
       "0      0.076923\n",
       "1      1.000000\n",
       "2      0.384615\n",
       "3      0.000000\n",
       "4      0.538462"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train, train_ID, labels, \\\n",
    "val, val_ID, val_labels, \\\n",
    "test, test_ID, test_labels, \\\n",
    "postprocess_dict = \\\n",
    "am.automunge(df_train,\n",
    "            MLinfill=False,\n",
    "             shuffletrain=False,\n",
    "             NArw_marker = False,\n",
    "             assigncat={'mnmx':'feature'},\n",
    "             printstatus=False,\n",
    "            )\n",
    "\n",
    "train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "f6934a37",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>feature_mnm4</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.075472</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.389937</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.547170</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   feature_mnm4\n",
       "0      0.075472\n",
       "1      1.000000\n",
       "2      0.389937\n",
       "3      0.000000\n",
       "4      0.547170"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train, train_ID, labels, \\\n",
    "val, val_ID, val_labels, \\\n",
    "test, test_ID, test_labels, \\\n",
    "postprocess_dict = \\\n",
    "am.automunge(df_train,\n",
    "            MLinfill=False,\n",
    "             shuffletrain=False,\n",
    "             NArw_marker = False,\n",
    "             assigncat={'mnm4':'feature'},\n",
    "             printstatus=False,\n",
    "            )\n",
    "\n",
    "train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "b2d04de6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>feature_nmbr</th>\n",
       "      <th>feature_nmbr_bint_0</th>\n",
       "      <th>feature_nmbr_bint_1</th>\n",
       "      <th>feature_nmbr_bint_2</th>\n",
       "      <th>feature_nmbr_bint_3</th>\n",
       "      <th>feature_nmbr_bint_4</th>\n",
       "      <th>feature_nmbr_bint_5</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-0.805313</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.495582</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-0.038348</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-0.997054</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.345134</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   feature_nmbr  feature_nmbr_bint_0  feature_nmbr_bint_1  \\\n",
       "0     -0.805313                    0                    0   \n",
       "1      1.495582                    0                    0   \n",
       "2     -0.038348                    0                    0   \n",
       "3     -0.997054                    0                    0   \n",
       "4      0.345134                    0                    0   \n",
       "\n",
       "   feature_nmbr_bint_2  feature_nmbr_bint_3  feature_nmbr_bint_4  \\\n",
       "0                    1                    0                    0   \n",
       "1                    0                    0                    1   \n",
       "2                    1                    0                    0   \n",
       "3                    1                    0                    0   \n",
       "4                    0                    1                    0   \n",
       "\n",
       "   feature_nmbr_bint_5  \n",
       "0                    0  \n",
       "1                    0  \n",
       "2                    0  \n",
       "3                    0  \n",
       "4                    0  "
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train, train_ID, labels, \\\n",
    "val, val_ID, val_labels, \\\n",
    "test, test_ID, test_labels, \\\n",
    "postprocess_dict = \\\n",
    "am.automunge(df_train,\n",
    "            MLinfill=False,\n",
    "             shuffletrain=False,\n",
    "             NArw_marker = False,\n",
    "#              assigncat={'mnm4':'feature'},\n",
    "             binstransform=True,\n",
    "             printstatus=False,\n",
    "            )\n",
    "\n",
    "train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ae5278ba",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
