"""
This file is part of Automunge which is released under GNU General Public License v3.0.
See file LICENSE or go to (anonymized) for full license details.

contact available via (anonymized)

Copyright (C) 2018, 2019, 2020 (anonymized) - All Rights Reserved

patent pending, applications (anonymized)
"""

#global imports
import numpy as np
import pandas as pd
from copy import deepcopy

#imports for process_time_class, postprocess_time_class
import datetime as dt

#imports for process_bxcx_class
from scipy import stats

#imports for process_hldy_class
from pandas.tseries.holiday import USFederalHolidayCalendar

#imports for evalcategory, getNArows
from collections import Counter
import datetime as dt
from scipy.stats import shapiro
from scipy.stats import skew

#imports for predictinfill, predictpostinfill, trainFSmodel
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import mean_squared_error
# from sklearn.metrics import roc_auc_score
from sklearn.metrics import accuracy_score
#stats may be used for cases where user elects RandomSearchCV hyperparameter tuning
# from scipy import stats

#imports for shuffleaccuracy
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_log_error

#imports for PCA dimensionality reduction
from sklearn.decomposition import PCA
from sklearn.decomposition import SparsePCA
from sklearn.decomposition import KernelPCA

#imports for automunge
import random
#import datetime as dt
import types

class AutoMunge:
  
  def __init__(self):
    pass

  def assembletransformdict(self, binstransform, NArw_marker):
    '''
    #assembles the range of transformations to be applied based on the evaluated \
    #category of data
    #the primitives are intented as follows:
    #_greatgrandparents_: supplemental column derived from source column, only applied
    #to first generation, with downstream transforms included
    #_grandparents_: supplemental column derived from source column, only applied
    #to first generation
    #_parents_: replace source column, with downstream trasnforms performed
    #_siblings_: supplemental column derived from source column, \
    #with downstream transforms performed
    #_auntsuncles_: replace source column, without downstream transforms performed
    #_cousins_: supplemental column derived from source column, \
    #without downstream transforms performed
    #downstream transform primitives are:
    #_children_: becomes downstream parent
    #_niecenephews_: treated like a downstream sibling
    #_coworkers_: becomes a downstream auntsuncles
    #_friends_: become downstream cousins    
    
    #for example, if we set 'bxcx' entry to have both 'bxcx' as parents and \
    #'nmbr' as cousin, then the output would be column_nmbr, column_bxcx_nmbr
    #(because 'bxcx' has a downstream primitive entry of 'nmbr' as well 
    
    #note a future extension will allow automunge class to run experiments
    #on different configurations of trasnform_dict to improve the feature selection
    '''

    transform_dict = {}

    #initialize bins based on what was passed through application of automunge(.)
    if binstransform is True:
      bint = 'bint'
    else:
      bint = None
        
    if NArw_marker is True:
      NArw = 'NArw'
    else:
      NArw = None

    #initialize trasnform_dict. Note in a future extension the range of categories
    #is intended to be built out
    transform_dict.update({'nmbr' : {'parents'       : ['nmbr'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : [bint]}})
    
    transform_dict.update({'dxdt' : {'parents'       : ['dxdt'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['retn'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['retn'], \
                                     'friends'       : []}})
    
    transform_dict.update({'d2dt' : {'parents'       : ['d2dt'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['retn'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['dxdt'], \
                                     'coworkers'     : ['retn'], \
                                     'friends'       : []}})
    
    transform_dict.update({'d3dt' : {'parents'       : ['d3dt'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['retn'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['d2dt'], \
                                     'coworkers'     : ['retn'], \
                                     'friends'       : []}})

    transform_dict.update({'d4dt' : {'parents'       : ['d4dt'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['retn'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['d3dt'], \
                                     'coworkers'     : ['retn'], \
                                     'friends'       : []}})

    transform_dict.update({'d5dt' : {'parents'       : ['d5dt'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['retn'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['d4dt'], \
                                     'coworkers'     : ['retn'], \
                                     'friends'       : []}})

    transform_dict.update({'d6dt' : {'parents'       : ['d6dt'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['retn'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['d5dt'], \
                                     'coworkers'     : ['retn'], \
                                     'friends'       : []}})
    
    transform_dict.update({'dxd2' : {'parents'       : ['dxd2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['retn'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['retn'], \
                                     'friends'       : []}})
    
    transform_dict.update({'d2d2' : {'parents'       : ['d2d2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['retn'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['dxd2'], \
                                     'coworkers'     : ['retn'], \
                                     'friends'       : []}})
    
    transform_dict.update({'d3d2' : {'parents'       : ['d3d2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['retn'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['d2d2'], \
                                     'coworkers'     : ['retn'], \
                                     'friends'       : []}})

    transform_dict.update({'d4d2' : {'parents'       : ['d4d2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['retn'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['d3d2'], \
                                     'coworkers'     : ['retn'], \
                                     'friends'       : []}})

    transform_dict.update({'d5d2' : {'parents'       : ['d5d2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['retn'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['d4d2'], \
                                     'coworkers'     : ['retn'], \
                                     'friends'       : []}})

    transform_dict.update({'d6d2' : {'parents'       : ['d6d2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['retn'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['d5d2'], \
                                     'coworkers'     : ['retn'], \
                                     'friends'       : []}})
    
    transform_dict.update({'nmdx' : {'parents'       : ['nmdx'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['dxdt'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['retn'], \
                                     'friends'       : []}})
    
    transform_dict.update({'nmd2' : {'parents'       : ['nmd2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['d2dt'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['retn'], \
                                     'friends'       : []}})
    
    transform_dict.update({'nmd3' : {'parents'       : ['nmd3'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['d3dt'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['retn'], \
                                     'friends'       : []}})

    transform_dict.update({'nmd4' : {'parents'       : ['nmd4'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['d4dt'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['retn'], \
                                     'friends'       : []}})

    transform_dict.update({'nmd5' : {'parents'       : ['nmd5'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['d5dt'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['retn'], \
                                     'friends'       : []}})

    transform_dict.update({'nmd6' : {'parents'       : ['nmd6'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['d6dt'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['retn'], \
                                     'friends'       : []}})
    
    transform_dict.update({'mmdx' : {'parents'       : ['mmdx'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['nbr2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['nbr2'], \
                                     'friends'       : []}})
    
    transform_dict.update({'mmd2' : {'parents'       : ['mmd2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['nbr2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['mmdx'], \
                                     'coworkers'     : ['nbr2'], \
                                     'friends'       : []}})
    
    transform_dict.update({'mmd3' : {'parents'       : ['mmd3'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['nbr2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['mmd2'], \
                                     'coworkers'     : ['nbr2'], \
                                     'friends'       : []}})

    transform_dict.update({'mmd4' : {'parents'       : ['mmd4'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['nbr2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['mmd3'], \
                                     'coworkers'     : ['nbr2'], \
                                     'friends'       : []}})

    transform_dict.update({'mmd5' : {'parents'       : ['mmd5'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['nbr2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['mmd4'], \
                                     'coworkers'     : ['nbr2'], \
                                     'friends'       : []}})

    transform_dict.update({'mmd6' : {'parents'       : ['mmd6'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['nbr2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['mmd5'], \
                                     'coworkers'     : ['nbr2'], \
                                     'friends'       : []}})
    
    transform_dict.update({'dddt' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['dddt', 'exc2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'ddd2' : {'parents'       : ['ddd2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['exc2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['dddt'], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'ddd3' : {'parents'       : ['ddd3'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['exc2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['ddd2'], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'ddd4' : {'parents'       : ['ddd4'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['exc2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['ddd3'], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'ddd5' : {'parents'       : ['ddd5'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['exc2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['ddd4'], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'ddd6' : {'parents'       : ['ddd6'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['exc2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['ddd5'], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'dedt' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['dedt', 'exc2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'ded2' : {'parents'       : ['ded2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['exc2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['dedt'], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'ded3' : {'parents'       : ['ded3'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['exc2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['ded2'], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'ded4' : {'parents'       : ['ded4'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['exc2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['ded3'], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'ded5' : {'parents'       : ['ded5'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['exc2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['ded4'], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'ded6' : {'parents'       : ['ded6'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['exc2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['ded5'], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'shft' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['shft'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'shf2' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['shf2'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'shf3' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['shf3'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'shf4' : {'parents'       : ['shf4'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['retn'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['retn'], \
                                     'friends'       : []}})
  
    transform_dict.update({'shf5' : {'parents'       : ['shf5'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['retn'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['retn'], \
                                     'friends'       : []}})
    
    transform_dict.update({'shf6' : {'parents'       : ['shf6'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['retn'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['retn'], \
                                     'friends'       : []}})

    transform_dict.update({'shf7' : {'parents'       : ['shf4', 'shf5'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['retn'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['retn'], \
                                     'friends'       : []}})
    
    transform_dict.update({'shf8' : {'parents'       : ['shf4', 'shf5', 'shf6'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['retn'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['retn'], \
                                     'friends'       : []}})

    transform_dict.update({'bnry' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bnry'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'bnr2' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bnr2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'onht' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['onht'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'onh2' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['onht'], \
                                     'cousins'       : ['NArw'], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'text' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['text'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'txt2' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['text'], \
                                     'cousins'       : [NArw, 'splt'], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'txt3' : {'parents'       : ['txt3'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['text'], \
                                     'friends'       : []}})

    transform_dict.update({'lngt' : {'parents'       : ['lngt'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['mnmx'], \
                                     'friends'       : []}})
  
    transform_dict.update({'lnlg' : {'parents'       : ['lnlg'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['log0'], \
                                     'friends'       : []}})

    transform_dict.update({'UPCS' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['UPCS'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'Unht' : {'parents'       : ['Unht'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['onht'], \
                                     'friends'       : []}})
  
    transform_dict.update({'Utxt' : {'parents'       : ['Utxt'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['text'], \
                                     'friends'       : []}})
    
    transform_dict.update({'Utx2' : {'parents'       : ['Utx2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['text'], \
                                     'friends'       : ['splt']}})

    transform_dict.update({'Utx3' : {'parents'       : ['Utx3'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['txt3'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'Ucct' : {'parents'       : ['Ucct'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['ucct', 'ord3'], \
                                     'friends'       : []}})
    
    transform_dict.update({'Uord' : {'parents'       : ['Uord'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['ordl'], \
                                     'friends'       : []}})
        
    transform_dict.update({'Uor2' : {'parents'       : ['Uor2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['ord2'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'Uor3' : {'parents'       : ['Uor3'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['ord3'], \
                                     'friends'       : []}})
    
    transform_dict.update({'Uor6' : {'parents'       : ['Uor6'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['spl6'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['ord3'], \
                                     'friends'       : []}})
    
    transform_dict.update({'U101' : {'parents'       : ['U101'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['1010'], \
                                     'friends'       : []}})
    
    transform_dict.update({'splt' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['splt'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'spl2' : {'parents'       : ['spl2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['ord3'], \
                                     'friends'       : []}})
    
    transform_dict.update({'spl5' : {'parents'       : ['spl5'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['ord3'], \
                                     'friends'       : []}})
    
    transform_dict.update({'spl6' : {'parents'       : ['spl6'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['splt'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : ['ord3']}})
    
    transform_dict.update({'spl7' : {'parents'       : ['spl7'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['ord3'], \
                                     'friends'       : []}})

    transform_dict.update({'spl8' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['spl8'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'spl9' : {'parents'       : ['spl9'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['ord3'], \
                                     'friends'       : []}})

    transform_dict.update({'sp10' : {'parents'       : ['sp10'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['ord3'], \
                                     'friends'       : []}})
    
    
    transform_dict.update({'sp11' : {'parents'       : ['sp11'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['spl5'], \
                                     'coworkers'     : ['ord3'], \
                                     'friends'       : []}})
    
    transform_dict.update({'sp12' : {'parents'       : ['sp12'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['sp11'], \
                                     'coworkers'     : ['ord3'], \
                                     'friends'       : []}})
    
    transform_dict.update({'sp13' : {'parents'       : ['sp13'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['sp10'], \
                                     'coworkers'     : ['ord3'], \
                                     'friends'       : []}})
    
    transform_dict.update({'sp14' : {'parents'       : ['sp14'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['sp13'], \
                                     'coworkers'     : ['ord3'], \
                                     'friends'       : []}})
    
    transform_dict.update({'sp15' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['sp15'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'sp16' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['sp16'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'sp17' : {'parents'       : ['sp17'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['spl5'], \
                                     'coworkers'     : ['ord3'], \
                                     'friends'       : []}})
    
    transform_dict.update({'sp18' : {'parents'       : ['sp18'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['sp17'], \
                                     'coworkers'     : ['ord3'], \
                                     'friends'       : []}})

    transform_dict.update({'sp19' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['sp19'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'sp20' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['sp20'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'sbst' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['sbst'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'sbs2' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['sbs2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'sbs3' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['sbs3'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'sbs4' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['sbs4'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'srch' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['srch'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'src2' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['src2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'src3' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['src3'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'src4' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['src4'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'aggt' : {'parents'       : ['aggt'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['ord3'], \
                                     'friends'       : []}})
    
    transform_dict.update({'strn' : {'parents'       : ['strn'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['ord3'], \
                                     'friends'       : []}})

  
    transform_dict.update({'strg' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['strg'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'nmrc' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['nmrc'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'nmr2' : {'parents'       : ['nmr2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['nmbr'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'nmr3' : {'parents'       : ['nmr3'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['mnmx'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'nmr4' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['nmr4'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'nmr5' : {'parents'       : ['nmr5'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['nmbr'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'nmr6' : {'parents'       : ['nmr6'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['mnmx'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'nmr7' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['nmr7'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'nmr8' : {'parents'       : ['nmr8'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['nmbr'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'nmr9' : {'parents'       : ['nmr9'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['mnmx'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'nmcm' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['nmcm'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'nmc2' : {'parents'       : ['nmc2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['nmbr'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'nmc3' : {'parents'       : ['nmc3'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['mnmx'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'nmc4' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['nmc4'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'nmc5' : {'parents'       : ['nmc5'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['nmbr'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'nmc6' : {'parents'       : ['nmc6'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['mnmx'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'nmc7' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['nmc7'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'nmc8' : {'parents'       : ['nmc8'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['nmbr'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'nmc9' : {'parents'       : ['nmc9'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['mnmx'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'nmEU' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['nmEU'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'nmE2' : {'parents'       : ['nmE2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['nmbr'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'nmE3' : {'parents'       : ['nmE3'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['mnmx'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'nmE4' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['nmE4'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'nmE5' : {'parents'       : ['nmE5'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['nmbr'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'nmE6' : {'parents'       : ['nmE6'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['mnmx'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'nmE7' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['nmE7'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'nmE8' : {'parents'       : ['nmE8'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['nmbr'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'nmE9' : {'parents'       : ['nmE9'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['mnmx'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'ors7' : {'parents'       : ['spl6', 'nmr2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['ord3'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'ors5' : {'parents'       : ['spl5'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['ord3'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'ors6' : {'parents'       : ['spl6'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['ord3'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'ordl' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['ordl'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
        
    transform_dict.update({'ord2' : {'parents'       : ['ord2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['mnmx'], \
                                     'friends'       : []}})
    
    transform_dict.update({'ord3' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['ord3'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'ord5' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['ord5'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'ucct' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['ucct'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
        
    transform_dict.update({'ord4' : {'parents'       : ['ord4'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['mnmx'], \
                                     'friends'       : []}})
    
    transform_dict.update({'ors2' : {'parents'       : ['spl2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['ord3'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'or10' : {'parents'       : ['ord4'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['1010'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['mnmx'], \
                                     'friends'       : []}})
    
    transform_dict.update({'or11' : {'parents'       : ['sp11'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['1010'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'or12' : {'parents'       : ['nmr2'], \
                                     'siblings'      : ['sp11'], \
                                     'auntsuncles'   : ['1010'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'or13' : {'parents'       : ['sp12'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['1010'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'or14' : {'parents'       : ['nmr2'], \
                                     'siblings'      : ['sp12'], \
                                     'auntsuncles'   : ['1010'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'or15' : {'parents'       : ['or15'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['sp13'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['1010'], \
                                     'friends'       : []}})
  
    transform_dict.update({'or16' : {'parents'       : ['or16'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['nmr2'], \
                                     'niecesnephews' : ['sp13'], \
                                     'coworkers'     : ['1010'], \
                                     'friends'       : []}})
    
    transform_dict.update({'or17' : {'parents'       : ['or17'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['sp14'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['1010'], \
                                     'friends'       : []}})
    
    transform_dict.update({'or18' : {'parents'       : ['or18'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['nmr2'], \
                                     'niecesnephews' : ['sp14'], \
                                     'coworkers'     : ['1010'], \
                                     'friends'       : []}})

    transform_dict.update({'or19' : {'parents'       : ['or19'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['nmc8'], \
                                     'niecesnephews' : ['sp13'], \
                                     'coworkers'     : ['1010'], \
                                     'friends'       : []}})
    
    transform_dict.update({'or20' : {'parents'       : ['or20'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['nmc8'], \
                                     'niecesnephews' : ['sp14'], \
                                     'coworkers'     : ['1010'], \
                                     'friends'       : []}})
    
    transform_dict.update({'or21' : {'parents'       : ['or21'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['nmc8'], \
                                     'niecesnephews' : ['sp17'], \
                                     'coworkers'     : ['1010'], \
                                     'friends'       : []}})
    
    transform_dict.update({'or22' : {'parents'       : ['or22'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['nmc8'], \
                                     'niecesnephews' : ['sp18'], \
                                     'coworkers'     : ['1010'], \
                                     'friends'       : []}})
    
    transform_dict.update({'om10' : {'parents'       : ['ord4'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['1010', 'mnmx'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['mnmx'], \
                                     'friends'       : []}})

    transform_dict.update({'mmor' : {'parents'       : ['ord4'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mnmx'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'1010' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['1010'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'null' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['null'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'NArw' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['NArw'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'NAr2' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['NAr2'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'NAr3' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['NAr3'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'NAr4' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['NAr4'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'NAr5' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['NAr5'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'nbr2' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['nmbr'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'nbr3' : {'parents'       : ['nbr3'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : ['bint']}})
    
    transform_dict.update({'MADn' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['MADn'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'MAD2' : {'parents'       : ['MAD2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['nmbr'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'MAD3' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['MAD3'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'mnmx' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mnmx'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'mnm2' : {'parents'       : ['nmbr'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mnmx'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'mnm3' : {'parents'       : ['nmbr'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mnm3'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'mnm4' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mnm3'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'mnm5' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mnmx'], \
                                     'cousins'       : ['nmbr', NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'mnm6' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mnm6'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'mnm7' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mnmx', 'bins'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'retn' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['retn'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'rtbn' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['retn', 'bsor'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'rtb2' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['retn', 'bins'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'mean' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mean'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'mea2' : {'parents'       : ['nmbr'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mean'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'mea3' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mean', 'bins'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'date' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['year', 'mnth', 'days', 'hour', 'mint', 'scnd'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'dat2' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bshr', 'wkdy', 'hldy'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'dat3' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['year', 'mnsn', 'mncs', 'dysn', 'dycs', 'hrsn', 'hrcs', 'misn', 'mics', 'scsn', 'sccs'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'dat4' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['year', 'mdsn', 'mdcs', 'hmss', 'hmsc'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'dat5' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['year', 'mdsn', 'mdcs', 'dysn', 'dycs', 'hmss', 'hmsc'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'dat6' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['year', 'mdsn', 'mdcs', 'hmss', 'hmsc', 'bshr', 'wkdy', 'hldy'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'year' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['year'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'yea2' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['year', 'yrsn', 'yrcs', 'mdsn', 'mdcs'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'yrcs' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['yrcs'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'yrsn' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['yrsn'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'mnth' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mnth'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'mnt2' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mnsn', 'mncs'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'mnt3' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mnsn', 'mncs', 'dysn', 'dycs', 'hrsn', 'hrcs', 'misn', 'mics', 'scsn', 'sccs'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'mnt4' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mdsn', 'mdcs'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'mnt5' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mdsn', 'mdcs', 'hmss', 'hmsc'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'mnt6' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mdsn', 'mdcs', 'dysn', 'dycs', 'hmss', 'hmsc'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'mnsn' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mnsn'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'mncs' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mncs'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'mdsn' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mdsn'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'mdcs' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mdcs'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'days' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['days'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'day2' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['dysn', 'dycs'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'day3' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['dysn', 'dycs', 'hrsn', 'hrcs', 'misn', 'mics', 'scsn', 'sccs'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'day4' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['dhms', 'dhmc'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'day5' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['dhms', 'dhmc', 'hmss', 'hmsc'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'dysn' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['dysn'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'dycs' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['dycs'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'dhms' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['dhms'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'dhmc' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['dhmc'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'hour' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['hour'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'hrs2' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['hrsn', 'hrcs'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'hrs3' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['hrsn', 'hrcs', 'misn', 'mics', 'scsn', 'sccs'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'hrs4' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['hmss', 'hmsc'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'hrsn' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['hrsn'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'hrcs' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['hrcs'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'hmss' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['hmss'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'hmsc' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['hmsc'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'mint' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mint'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'min2' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['misn', 'mics'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'min3' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['misn', 'mics', 'scsn', 'sccs'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'min4' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mssn', 'mscs'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'misn' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['misn'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'mics' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mics'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'mssn' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mssn'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'mscs' : {'parents'       : [], \
                                     'siblings': [], \
                                     'auntsuncles'   : ['mscs'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'scnd' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['scnd'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'scn2' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['scsn', 'sccs'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'scsn' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['scsn'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'sccs' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['sccs'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'bxcx' : {'parents'       : ['bxcx'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['nmbr'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'bxc2' : {'parents'       : ['bxc2'], \
                                     'siblings'      : ['nmbr'], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['nmbr'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'bxc3' : {'parents'       : ['bxc3'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['nmbr'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'bxc4' : {'parents'       : ['bxc4'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['nbr2'], \
                                     'friends'       : []}})

    transform_dict.update({'bxc5' : {'parents'       : ['bxc5'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mnmx'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['nbr2', 'bins'], \
                                     'friends'       : []}})

    transform_dict.update({'ntgr' : {'parents'       : ['ntgr'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['retn', '1010', 'ordl'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['mnmx'], \
                                     'friends'       : []}})
    
    transform_dict.update({'ntg2' : {'parents'       : ['ntg2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['retn', '1010', 'ordl', 'pwr2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['mnmx'], \
                                     'friends'       : []}})
    
    transform_dict.update({'ntg3' : {'parents'       : ['ntg3'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['retn', 'ordl', 'por2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['mnmx'], \
                                     'friends'       : []}})
    
    transform_dict.update({'pwrs' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['pwrs'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'pwr2' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['pwr2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'log0' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['log0'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'log1' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['log0', 'pwr2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'logn' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['logn'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'lgnm' : {'parents'       : ['lgnm'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['nmbr'], \
                                     'friends'       : []}})
    
    transform_dict.update({'sqrt' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['sqrt'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'addd' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['addd'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'sbtr' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['sbtr'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'mltp' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mltp'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'divd' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['divd'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'rais' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['rais'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'absl' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['absl'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'bkt1' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bkt1'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'bkt2' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bkt2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'bkt3' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bkt3'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'bkt4' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bkt4'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'wkdy' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['wkdy'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'bshr' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bshr'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'hldy' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['hldy'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'wkds' : {'parents'       : ['wkds'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['text'], \
                                     'friends'       : []}})
  
    transform_dict.update({'wkdo' : {'parents'       : ['wkdo'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['ordl'], \
                                     'friends'       : []}})
    
    transform_dict.update({'mnts' : {'parents'       : ['mnts'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['text'], \
                                     'friends'       : []}})
  
    transform_dict.update({'mnto' : {'parents'       : ['mnto'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['ordl'], \
                                     'friends'       : []}})
    
    transform_dict.update({'bins' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bins'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'bint' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bint'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'bsor' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bsor'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'btor' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['btor'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'bnwd' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bnwd'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'bnwK' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bnwK'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'bnwM' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bnwM'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'bnwo' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bnwo'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'bnKo' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bnKo'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'bnMo' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bnMo'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})    
    
    transform_dict.update({'bnep' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bnep'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'bne7' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bne7'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'bne9' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bne9'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'bneo' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bneo'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'bn7o' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bn7o'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'bn9o' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bn9o'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'tlbn' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['tlbn'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'pwor' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['pwor'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'por2' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['por2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'por3' : {'parents'       : ['por3'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['1010'], \
                                     'friends'       : []}})

    transform_dict.update({'bkb3' : {'parents'       : ['bkb3'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['1010'], \
                                     'friends'       : []}})
  
    transform_dict.update({'bkb4' : {'parents'       : ['bkb4'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['1010'], \
                                     'friends'       : []}})
    
    transform_dict.update({'bsbn' : {'parents'       : ['bsbn'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['1010'], \
                                     'friends'       : []}})
    
    transform_dict.update({'bnwb' : {'parents'       : ['bnwb'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['1010'], \
                                     'friends'       : []}})
    
    transform_dict.update({'bnKb' : {'parents'       : ['bnKb'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['1010'], \
                                     'friends'       : []}})

    transform_dict.update({'bnMb' : {'parents'       : ['bnMb'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['1010'], \
                                     'friends'       : []}})
    
    transform_dict.update({'bneb' : {'parents'       : ['bneb'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['1010'], \
                                     'friends'       : []}})

    transform_dict.update({'bn7b' : {'parents'       : ['bn7b'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['1010'], \
                                     'friends'       : []}})
    
    transform_dict.update({'bn9b' : {'parents'       : ['bn9b'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['1010'], \
                                     'friends'       : []}})
    
    transform_dict.update({'pwbn' : {'parents'       : ['pwbn'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['1010'], \
                                     'friends'       : []}})

    transform_dict.update({'DPnb' : {'parents'       : ['DPn3'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'DPn3' : {'parents'       : ['DPn3'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['DPnb'], \
                                     'friends'       : []}})

    transform_dict.update({'DPmm' : {'parents'       : ['DPm2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'DPm2' : {'parents'       : ['DPm2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['DPmm'], \
                                     'friends'       : []}})

    transform_dict.update({'DPrt' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['DPrt'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'DLnb' : {'parents'       : ['DLn3'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'DLn3' : {'parents'       : ['DLn3'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['DLnb'], \
                                     'friends'       : []}})

    transform_dict.update({'DLmm' : {'parents'       : ['DLm2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'DLm2' : {'parents'       : ['DLm2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['DLmm'], \
                                     'friends'       : []}})

    transform_dict.update({'DLrt' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['DLrt'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'DPbn' : {'parents'       : ['DPb2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'DPb2' : {'parents'       : ['DPb2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['DPbn'], \
                                     'friends'       : []}})
    
    transform_dict.update({'DPod' : {'parents'       : ['DPo4'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'DPo4' : {'parents'       : ['DPo4'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['DPod'], \
                                     'friends'       : []}})
    
    transform_dict.update({'DPoh' : {'parents'       : ['DPo5'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['onht'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'DPo5' : {'parents'       : ['DPo5'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['DPo2'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'DPo2' : {'parents'       : ['DPo2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['onht'], \
                                     'friends'       : []}})
    
    transform_dict.update({'DP10' : {'parents'       : ['DPo6'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['1010'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'DPo6' : {'parents'       : ['DPo6'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['DPo3'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'DPo3' : {'parents'       : ['DPo3'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['1010'], \
                                     'friends'       : []}})
    
    transform_dict.update({'copy' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['copy'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'excl' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['excl'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'exc2' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['exc2'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'exc3' : {'parents'       : ['exc3'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : ['bins']}})
    
    transform_dict.update({'exc4' : {'parents'       : ['exc4'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : ['pwr2']}})
    
    transform_dict.update({'exc5' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['exc5'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    #exc6 was removed from library, is now same as excl, just including here so no printout for prior code demonstrations 
    transform_dict.update({'exc6' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['excl'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'shfl' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['shfl'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'nmbd' : {'parents'       : ['nmbr'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : [bint]}})

    transform_dict.update({'101d' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['1010'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'ordd' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['ord3'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'texd' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['text'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'bnrd' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bnry'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'datd' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['year', 'mdsn', 'mdcs', 'hmss', 'hmsc', 'bshr', 'wkdy', 'hldy'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'nuld' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['null'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'lbnm' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['exc2'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'lb10' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['text'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'lbor' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['ord3'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'lbo5' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['ordl'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'lbos' : {'parents'       : ['lbos'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['strg'], \
                                     'friends'       : []}})
    
    transform_dict.update({'lbte' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['text'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'lbbn' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['text'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'lbda' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['year', 'mdsn', 'mdcs', 'hmss', 'hmsc', 'bshr', 'wkdy', 'hldy'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    return transform_dict
  
  def assembleprocessdict(self):
    '''
    #creates a dictionary storing all of the processing functions for each
    #category. Note that the convention is that every dualprocess entry 
    #(to process both train and text set in automunge) is meant
    #to have a coresponding postprocess entry (to process the test set in 
    #postmunge). If the dualprocess/postprocess pair aren't included a 
    #singleprocess funciton will be instead which processes a single column
    #at a time and is neutral to whether that set is from train or test data.
    
    #note that the functionpointer entry is currenlty only available for user passed processdict
    #this internal library process_dict does not accept functionpointer entries
    
    #NArowtype entries are:
    # - 'numeric' for source columns with expected numeric entries
    # - 'integer' for source column with expected integer entries
    # - 'justNaN' for source columns that may have expected entries other than numeric
    # - 'exclude' for source columns that aren't needing NArow columns derived
    # - 'positivenumeric' for source columns with expected positive numeric entries
    # - 'nonnegativenumeric' for source columns with expected non-nbegative numeric (zero allowed)
    # - 'nonzeronumeric' for source columns with allowed postiive and negative but no zero
    # - 'parsenumeric' marks for infill strings that don't contain any numeric character entries
    # - 'parsenumeric_commas' marks for infill strings that don't contain any numeric character entries, recognizes commas
    # - 'parsenumeric_EU' marks for infill strings that don't contain any numeric character entries, 
    #            recognizes international standard of period deliminator and comma decimal
    # - 'datetime' marks for infill cells that arent' recognized as datetime objects
    
    #MLinfilltype entries are:
    # - 'numeric' for single columns with numeric entries (such as could be floats)
    # - 'singlct' for single column sets with ordinal entries (integers)
    # - 'binary' for single column sets with boolean entries (0/1)
    # - 'multirt' for categorical multicolumn sets with boolean entries (0/1)
    # - 'concurrent_act' for multicolumn sets with boolean entries as may have 
    #multiple entries in the same row
    # - 'concurrent_nmbr' for multicolumn sets with numerical entries
    # - 'exclude' for columns which will be excluded from ML infill
    # - '1010' for binary encoded columns, will be converted to onehot for ML
    # - 'boolexclude' boolean set suitable for Binary transform but exluded from MLinfill
    # - 'totalexclude' sets excluded from all methods that inspect MLinfill, such as for excl category

    #at least one of sets of ('dualprocess' and 'postprocess') or ('singleprocess') needs to be specified
    #'inverseprocess' is optional and supports postmunge inversion
    #'info_retention' is optional boolean required with inversion to prioritize transforms with more information retention
    #'inplace_option' is optional boolean to signal when a transfomration function accepts inplace operations
    #'labelctgy' is associated with feature importance and signals which transform is target for predictive model
    #for cases when a family tree returns multiple configurations and category isapplied to a label set

    #note to self that any future updates such as additional supported entries should be carried through to functionpointer functions
    '''
    
    process_dict = {}
    
    #categories are nmbr, bnry, text, date, bxcx, bins, bint, NArw, null
    #note a future extension will allow the definition of new categories 
    #to automunge

    #dual column functions
    process_dict.update({'nmbr' : {'dualprocess' : self.process_numerical_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_numerical_class, \
                                  'inverseprocess' : self.inverseprocess_nmbr, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'dxdt' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_dxdt_class, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'d2dt' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_dxdt_class, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'d3dt' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_dxdt_class, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'d4dt' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_dxdt_class, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'d5dt' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_dxdt_class, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'d6dt' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_dxdt_class, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'dxd2' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_dxd2_class, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'d2d2' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_dxd2_class, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'d3d2' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_dxd2_class, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'d4d2' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_dxd2_class, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'d5d2' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_dxd2_class, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'d6d2' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_dxd2_class, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'nmdx' : {'dualprocess' : self.process_numerical_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_numerical_class, \
                                  'inverseprocess' : self.inverseprocess_nmbr, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'nmd2' : {'dualprocess' : self.process_numerical_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_numerical_class, \
                                  'inverseprocess' : self.inverseprocess_nmbr, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'nmd3' : {'dualprocess' : self.process_numerical_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_numerical_class, \
                                  'inverseprocess' : self.inverseprocess_nmbr, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'nmd4' : {'dualprocess' : self.process_numerical_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_numerical_class, \
                                  'inverseprocess' : self.inverseprocess_nmbr, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'nmd5' : {'dualprocess' : self.process_numerical_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_numerical_class, \
                                  'inverseprocess' : self.inverseprocess_nmbr, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'nmd6' : {'dualprocess' : self.process_numerical_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_numerical_class, \
                                  'inverseprocess' : self.inverseprocess_nmbr, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'mmdx' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_dxdt_class, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'mmd2' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_dxdt_class, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'mmd3' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_dxdt_class, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'mmd4' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_dxdt_class, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'mmd5' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_dxdt_class, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'mmd6' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_dxdt_class, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'dddt' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_dxdt_class, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'dxdt'}})
    process_dict.update({'ddd2' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_dxdt_class, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'dxdt'}})
    process_dict.update({'ddd3' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_dxdt_class, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'dxdt'}})
    process_dict.update({'ddd4' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_dxdt_class, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'dxdt'}})
    process_dict.update({'ddd5' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_dxdt_class, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'dxdt'}})
    process_dict.update({'ddd6' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_dxdt_class, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'dxdt'}})
    process_dict.update({'dedt' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_dxd2_class, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'dxd2'}})
    process_dict.update({'ded2' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_dxd2_class, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'dxd2'}})
    process_dict.update({'ded3' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_dxd2_class, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'dxd2'}})
    process_dict.update({'ded4' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_dxd2_class, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'dxd2'}})
    process_dict.update({'ded5' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_dxd2_class, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'dxd2'}})
    process_dict.update({'ded6' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_dxd2_class, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'dxd2'}})
    process_dict.update({'shft' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_shft_class, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self.inverseprocess_shft, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'exclude', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'shft'}})
    process_dict.update({'shf2' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_shf2_class, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self.inverseprocess_shft, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'exclude', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'shf2'}})
    process_dict.update({'shf3' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_shf3_class, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self.inverseprocess_shft, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'exclude', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'shf3'}})
    process_dict.update({'shf4' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_shft_class, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self.inverseprocess_shft, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'exclude', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'shf5' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_shf2_class, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self.inverseprocess_shft, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'exclude', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'shf6' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_shf3_class, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self.inverseprocess_shft, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'exclude', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'shf7' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_shft_class, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self.inverseprocess_shft, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'exclude', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'shf8' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_shft_class, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self.inverseprocess_shft, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'exclude', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'nbr2' : {'dualprocess' : self.process_numerical_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_numerical_class, \
                                  'inverseprocess' : self.inverseprocess_nmbr, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'nbr3' : {'dualprocess' : self.process_numerical_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_numerical_class, \
                                  'inverseprocess' : self.inverseprocess_nmbr, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'MADn' : {'dualprocess' : self.process_MADn_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_MADn_class, \
                                  'inverseprocess' : self.inverseprocess_MADn, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'MADn'}})
    process_dict.update({'MAD2' : {'dualprocess' : self.process_MADn_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_MADn_class, \
                                  'inverseprocess' : self.inverseprocess_MADn, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'MADn'}})
    process_dict.update({'MAD3' : {'dualprocess' : self.process_MAD3_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_MAD3_class, \
                                  'inverseprocess' : self.inverseprocess_MAD3, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'MAD3'}})
    process_dict.update({'mnmx' : {'dualprocess' : self.process_mnmx_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_mnmx_class, \
                                  'inverseprocess' : self.inverseprocess_mnmx, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'mnmx'}})
    process_dict.update({'mnm2' : {'dualprocess' : self.process_mnmx_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_mnmx_class, \
                                  'inverseprocess' : self.inverseprocess_mnmx, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'mnmx'}})
    process_dict.update({'mnm3' : {'dualprocess' : self.process_mnm3_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_mnm3_class, \
                                  'inverseprocess' : self.inverseprocess_mnm3, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'mnm3'}})
    process_dict.update({'mnm4' : {'dualprocess' : self.process_mnm3_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_mnm3_class, \
                                  'inverseprocess' : self.inverseprocess_mnm3, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'mnm3'}})
    process_dict.update({'mnm5' : {'dualprocess' : self.process_mnmx_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_mnmx_class, \
                                  'inverseprocess' : self.inverseprocess_mnmx, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'mnmx'}})
    process_dict.update({'mnm6' : {'dualprocess' : self.process_mnmx_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_mnmx_class, \
                                  'inverseprocess' : self.inverseprocess_mnmx, \
                                  'defaultparams' : {'floor' : True}, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'mnmx'}})
    process_dict.update({'mnm7' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'mnm7'}})
    process_dict.update({'retn' : {'dualprocess' : self.process_retn_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_retn_class, \
                                  'inverseprocess' : self.inverseprocess_retn, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'rtbn' : {'dualprocess' : self.process_retn_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_retn_class, \
                                  'inverseprocess' : self.inverseprocess_retn, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'rtb2' : {'dualprocess' : self.process_retn_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_retn_class, \
                                  'inverseprocess' : self.inverseprocess_retn, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'mean' : {'dualprocess' : self.process_mean_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_mean_class, \
                                  'inverseprocess' : self.inverseprocess_mean, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'mean'}})
    process_dict.update({'mea2' : {'dualprocess' : self.process_mean_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_mean_class, \
                                  'inverseprocess' : self.inverseprocess_mean, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'mean'}})
    process_dict.update({'mea3' : {'dualprocess' : self.process_mean_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_mean_class, \
                                  'inverseprocess' : self.inverseprocess_mean, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'mean'}})
    process_dict.update({'bnry' : {'dualprocess' : self.process_binary_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_binary_class, \
                                  'inverseprocess' : self.inverseprocess_bnry, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'binary', \
                                  'labelctgy' : 'bnry'}})
    process_dict.update({'bnr2' : {'dualprocess' : self.process_binary2_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_binary2_class, \
                                  'inverseprocess' : self.inverseprocess_bnry, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'binary', \
                                  'labelctgy' : 'bnr2'}})
    process_dict.update({'onht' : {'dualprocess' : self.process_onht_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_onht_class, \
                                  'inverseprocess' : self.inverseprocess_onht, \
                                  'info_retention' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'multirt', \
                                  'labelctgy' : 'onht'}})
    process_dict.update({'onh2' : {'dualprocess' : self.process_onht_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_onht_class, \
                                  'inverseprocess' : self.inverseprocess_onht, \
                                  'info_retention' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'multirt', \
                                  'labelctgy' : 'onht'}})
    process_dict.update({'text' : {'dualprocess' : self.process_text_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_text_class, \
                                  'inverseprocess' : self.inverseprocess_text, \
                                  'info_retention' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'multirt', \
                                  'labelctgy' : 'text'}})
    process_dict.update({'txt2' : {'dualprocess' : self.process_text_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_text_class, \
                                  'inverseprocess' : self.inverseprocess_text, \
                                  'info_retention' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'multirt', \
                                  'labelctgy' : 'text'}})
    process_dict.update({'txt3' : {'dualprocess' : self.process_spl2_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_spl2_class, \
                                  'inverseprocess' : self.inverseprocess_spl2, \
                                  'info_retention' : False, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'text'}})
    process_dict.update({'lngt' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_lngt_class, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'mnmx'}})
    process_dict.update({'lnlg' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_lngt_class, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'log0'}})
    process_dict.update({'UPCS' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_UPCS_class, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self.inverseprocess_UPCS, \
                                  'info_retention' : False, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'exclude'}})
    process_dict.update({'Unht' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_UPCS_class, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self.inverseprocess_UPCS, \
                                  'info_retention' : False, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'onht'}})
    process_dict.update({'Utxt' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_UPCS_class, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self.inverseprocess_UPCS, \
                                  'info_retention' : False, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'text'}})
    process_dict.update({'Utx2' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_UPCS_class, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self.inverseprocess_UPCS, \
                                  'info_retention' : False, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'text'}})
    process_dict.update({'Utx3' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_UPCS_class, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self.inverseprocess_UPCS, \
                                  'info_retention' : False, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'text'}})
    process_dict.update({'Ucct' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_UPCS_class, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self.inverseprocess_UPCS, \
                                  'info_retention' : False, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'ucct'}})
    process_dict.update({'Uord' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_UPCS_class, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self.inverseprocess_UPCS, \
                                  'info_retention' : False, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'ordl'}})
    process_dict.update({'Uor2' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_UPCS_class, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self.inverseprocess_UPCS, \
                                  'info_retention' : False, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'mnmx'}})
    process_dict.update({'Uor3' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_UPCS_class, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self.inverseprocess_UPCS, \
                                  'info_retention' : False, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'ord3'}})
    process_dict.update({'Uor6' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_UPCS_class, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self.inverseprocess_UPCS, \
                                  'info_retention' : False, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'ord3'}})
    process_dict.update({'U101' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_UPCS_class, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self.inverseprocess_UPCS, \
                                  'info_retention' : False, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : '1010'}})
    process_dict.update({'splt' : {'dualprocess' : self.process_splt_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_splt_class, \
                                  'inverseprocess' : self.inverseprocess_splt, \
                                  'info_retention' : False, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'multirt', \
                                  'labelctgy' : 'splt'}})

    process_dict.update({'spl2' : {'dualprocess' : self.process_spl2_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_spl2_class, \
                                  'inverseprocess' : self.inverseprocess_spl2, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix' : '_spl2', \
                                                     'test_same_as_train' : False, \
                                                     'consolidate_nonoverlaps' : False}, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'ord3'}})
    process_dict.update({'spl5' : {'dualprocess' : self.process_spl2_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_spl2_class, \
                                  'inverseprocess' : self.inverseprocess_spl2, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix' : '_spl5', \
                                                     'test_same_as_train' : False, \
                                                     'consolidate_nonoverlaps' : True}, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'ord3'}})
    process_dict.update({'spl6' : {'dualprocess' : self.process_spl2_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_spl2_class, \
                                  'inverseprocess' : self.inverseprocess_spl2, \
                                  'info_retention' : False, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'ord3'}})
    process_dict.update({'spl7' : {'dualprocess' : self.process_spl2_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_spl2_class, \
                                  'inverseprocess' : self.inverseprocess_spl2, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix' : '_spl7', \
                                                     'test_same_as_train' : False, \
                                                     'consolidate_nonoverlaps' : True, \
                                                     'minsplit' : 1}, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'ord3'}})
    process_dict.update({'spl8' : {'dualprocess' : self.process_splt_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_splt_class, \
                                  'inverseprocess' : self.inverseprocess_splt, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix' : '_spl8', 'test_same_as_train' : True}, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'multirt', \
                                  'labelctgy' : 'splt'}})
    process_dict.update({'spl9' : {'dualprocess' : self.process_spl2_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_spl2_class, \
                                  'inverseprocess' : self.inverseprocess_spl2, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix' : '_spl9', \
                                                     'test_same_as_train' : True, \
                                                     'consolidate_nonoverlaps' : False}, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'ord3'}})
    process_dict.update({'sp10' : {'dualprocess' : self.process_spl2_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_spl2_class, \
                                  'inverseprocess' : self.inverseprocess_spl2, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix' : '_sp10', \
                                                     'test_same_as_train' : True, \
                                                     'consolidate_nonoverlaps' : True}, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'ord3'}})
    process_dict.update({'sp11' : {'dualprocess' : self.process_spl2_class, \
                                   'singleprocess' : None, \
                                   'postprocess' : self.postprocess_spl2_class, \
                                   'inverseprocess' : self.inverseprocess_spl2, \
                                   'info_retention' : False, \
                                  'defaultparams' : {'suffix' : '_spl2', \
                                                     'test_same_as_train' : False, \
                                                     'consolidate_nonoverlaps' : False}, \
                                   'NArowtype' : 'justNaN', \
                                   'MLinfilltype' : 'exclude', \
                                   'labelctgy' : 'ord3'}})
    process_dict.update({'sp12' : {'dualprocess' : self.process_spl2_class, \
                                   'singleprocess' : None, \
                                   'postprocess' : self.postprocess_spl2_class, \
                                   'inverseprocess' : self.inverseprocess_spl2, \
                                   'info_retention' : False, \
                                  'defaultparams' : {'suffix' : '_spl2', \
                                                     'test_same_as_train' : False, \
                                                     'consolidate_nonoverlaps' : False}, \
                                   'NArowtype' : 'justNaN', \
                                   'MLinfilltype' : 'exclude', \
                                   'labelctgy' : 'ord3'}})
    process_dict.update({'sp13' : {'dualprocess' : self.process_spl2_class, \
                                   'singleprocess' : None, \
                                   'postprocess' : self.postprocess_spl2_class, \
                                   'inverseprocess' : self.inverseprocess_spl2, \
                                   'info_retention' : False, \
                                  'defaultparams' : {'suffix' : '_spl9', \
                                                     'test_same_as_train' : True, \
                                                     'consolidate_nonoverlaps' : False}, \
                                   'NArowtype' : 'justNaN', \
                                   'MLinfilltype' : 'exclude', \
                                   'labelctgy' : 'ord3'}})
    process_dict.update({'sp14' : {'dualprocess' : self.process_spl2_class, \
                                   'singleprocess' : None, \
                                   'postprocess' : self.postprocess_spl2_class, \
                                   'inverseprocess' : self.inverseprocess_spl2, \
                                   'info_retention' : False, \
                                  'defaultparams' : {'suffix' : '_spl9', \
                                                     'test_same_as_train' : True, \
                                                     'consolidate_nonoverlaps' : False}, \
                                   'NArowtype' : 'justNaN', \
                                   'MLinfilltype' : 'exclude', \
                                   'labelctgy' : 'ord3'}})
    process_dict.update({'sp15' : {'dualprocess' : self.process_splt_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_splt_class, \
                                  'inverseprocess' : self.inverseprocess_splt, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix' : '_sp15', \
                                                     'concurrent_activations': True, \
                                                     'test_same_as_train' : False}, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'concurrent_act', \
                                  'labelctgy' : 'splt'}})
    process_dict.update({'sp16' : {'dualprocess' : self.process_splt_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_splt_class, \
                                  'inverseprocess' : self.inverseprocess_splt, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix' : '_sp16', \
                                                     'concurrent_activations': True, \
                                                     'test_same_as_train' : True}, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'concurrent_act', \
                                  'labelctgy' : 'splt'}})
    process_dict.update({'sp17' : {'dualprocess' : self.process_spl2_class, \
                                   'singleprocess' : None, \
                                   'postprocess' : self.postprocess_spl2_class, \
                                   'inverseprocess' : self.inverseprocess_spl2, \
                                   'info_retention' : False, \
                                  'defaultparams' : {'suffix' : '_spl2', \
                                                     'test_same_as_train' : False, \
                                                     'consolidate_nonoverlaps' : False}, \
                                   'NArowtype' : 'justNaN', \
                                   'MLinfilltype' : 'exclude', \
                                   'labelctgy' : 'ord3'}})
    process_dict.update({'sp18' : {'dualprocess' : self.process_spl2_class, \
                                   'singleprocess' : None, \
                                   'postprocess' : self.postprocess_spl2_class, \
                                   'inverseprocess' : self.inverseprocess_spl2, \
                                   'info_retention' : False, \
                                  'defaultparams' : {'suffix' : '_spl2', \
                                                     'test_same_as_train' : False, \
                                                     'consolidate_nonoverlaps' : False}, \
                                   'NArowtype' : 'justNaN', \
                                   'MLinfilltype' : 'exclude', \
                                   'labelctgy' : 'ord3'}})
    process_dict.update({'sp19' : {'dualprocess' : self.process_sp19_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_sp19_class, \
                                  'inverseprocess' : self.inverseprocess_sp19, \
                                  'info_retention' : False, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : '1010', \
                                  'labelctgy' : 'sp19'}})
    process_dict.update({'sp20' : {'dualprocess' : self.process_sp19_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_sp19_class, \
                                  'inverseprocess' : self.inverseprocess_sp19, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix' : '_sp20', \
                                                     'test_same_as_train' : True}, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : '1010', \
                                  'labelctgy' : 'sp19'}})
    process_dict.update({'sbst' : {'dualprocess' : self.process_sbst_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_sbst_class, \
                                  'inverseprocess' : self.inverseprocess_sbst, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix' : '_sbst', \
                                                     'test_same_as_train' : False}, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'concurrent_act', \
                                  'labelctgy' : 'sbst'}})
    process_dict.update({'sbs2' : {'dualprocess' : self.process_sbst_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_sbst_class, \
                                  'inverseprocess' : self.inverseprocess_sbst, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix' : '_sbs2', \
                                                     'test_same_as_train' : True}, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'concurrent_act', \
                                  'labelctgy' : 'sbst'}})
    process_dict.update({'sbs3' : {'dualprocess' : self.process_sbs3_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_sbs3_class, \
                                  'inverseprocess' : self.inverseprocess_sbs3, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix' : '_sbs3', \
                                                     'test_same_as_train' : False}, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : '1010', \
                                  'labelctgy' : 'sbs3'}})
    process_dict.update({'sbs4' : {'dualprocess' : self.process_sbs3_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_sbs3_class, \
                                  'inverseprocess' : self.inverseprocess_sbs3, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix' : '_sbs4', \
                                                     'test_same_as_train' : True}, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : '1010', \
                                  'labelctgy' : 'sbs3'}})
    process_dict.update({'srch' : {'dualprocess' : self.process_srch_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_srch_class, \
                                   'inverseprocess' : self.inverseprocess_srch, \
                                   'info_retention' : False, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'concurrent_act', \
                                  'labelctgy' : 'srch'}})
    process_dict.update({'src2' : {'dualprocess' : self.process_src2_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_src2_class, \
                                   'inverseprocess' : self.inverseprocess_src2, \
                                   'info_retention' : False, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'concurrent_act', \
                                  'labelctgy' : 'src2'}})
    process_dict.update({'src3' : {'dualprocess' : self.process_src3_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_src3_class, \
                                   'inverseprocess' : self.inverseprocess_src3, \
                                   'info_retention' : False, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'concurrent_act', \
                                  'labelctgy' : 'src3'}})
    process_dict.update({'src4' : {'dualprocess' : self.process_src4_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_src4_class, \
                                   'inverseprocess' : self.inverseprocess_src4, \
                                   'info_retention' : False, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'src4'}})
    process_dict.update({'aggt' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_aggt_class, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self.inverseprocess_UPCS, \
                                  'info_retention' : False, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'ord3'}})
    process_dict.update({'strn' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_strn_class, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'ord3'}})
    process_dict.update({'strg' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_strg_class, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self.inverseprocess_strg, \
                                  'info_retention' : True, \
                                  'NArowtype' : 'integer', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'ord3'}})
    process_dict.update({'nmrc' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_nmrc_class, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self.inverseprocess_nmrc, \
                                  'info_retention' : True, \
                                  'defaultparams' : {'convention' : 'numbers', \
                                                     'suffix' : '_nmrc'}, \
                                  'NArowtype' : 'parsenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmrc'}})
    process_dict.update({'nmr2' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_nmrc_class, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self.inverseprocess_nmrc, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'convention' : 'numbers', \
                                                     'suffix' : '_nmrc'}, \
                                  'NArowtype' : 'parsenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'nmr3' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_nmrc_class, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self.inverseprocess_nmrc, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'convention' : 'numbers', \
                                                     'suffix' : '_nmrc'}, \
                                  'NArowtype' : 'parsenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'mnmx'}})
    process_dict.update({'nmr4' : {'dualprocess' : self.process_nmr4_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_nmr4_class, \
                                  'inverseprocess' : self.inverseprocess_nmrc, \
                                  'info_retention' : True, \
                                  'defaultparams' : {'convention' : 'numbers', \
                                                     'suffix' : '_nmr4', \
                                                     'test_same_as_train' : True}, \
                                  'NArowtype' : 'parsenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmrc'}})
    process_dict.update({'nmr5' : {'dualprocess' : self.process_nmr4_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_nmr4_class, \
                                  'inverseprocess' : self.inverseprocess_nmrc, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'convention' : 'numbers', \
                                                     'suffix' : '_nmr4', \
                                                     'test_same_as_train' : True}, \
                                  'NArowtype' : 'parsenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'nmr6' : {'dualprocess' : self.process_nmr4_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_nmr4_class, \
                                  'inverseprocess' : self.inverseprocess_nmrc, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'convention' : 'numbers', \
                                                     'suffix' : '_nmr4', \
                                                     'test_same_as_train' : True}, \
                                  'NArowtype' : 'parsenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'mnmx'}})
    process_dict.update({'nmr7' : {'dualprocess' : self.process_nmr4_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_nmr4_class, \
                                  'inverseprocess' : self.inverseprocess_nmrc, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'convention' : 'numbers', \
                                                     'suffix' : '_nmr7', \
                                                     'test_same_as_train' : False}, \
                                  'NArowtype' : 'parsenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmrc'}})
    process_dict.update({'nmr8' : {'dualprocess' : self.process_nmr4_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_nmr4_class, \
                                  'inverseprocess' : self.inverseprocess_nmrc, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'convention' : 'numbers', \
                                                     'suffix' : '_nmr7', \
                                                     'test_same_as_train' : False}, \
                                  'NArowtype' : 'parsenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'nmr9' : {'dualprocess' : self.process_nmr4_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_nmr4_class, \
                                  'inverseprocess' : self.inverseprocess_nmrc, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'convention' : 'numbers', \
                                                     'suffix' : '_nmr7', \
                                                     'test_same_as_train' : False}, \
                                  'NArowtype' : 'parsenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'mnmx'}})
    process_dict.update({'nmcm' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_nmrc_class, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self.inverseprocess_nmrc, \
                                  'info_retention' : True, \
                                  'defaultparams' : {'convention' : 'commas', \
                                                     'suffix' : '_nmcm'}, \
                                  'NArowtype' : 'parsenumeric_commas', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmcm'}})
    process_dict.update({'nmc2' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_nmrc_class, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self.inverseprocess_nmrc, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'convention' : 'commas', \
                                                     'suffix' : '_nmcm'}, \
                                  'NArowtype' : 'parsenumeric_commas', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'nmc3' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_nmrc_class, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self.inverseprocess_nmrc, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'convention' : 'commas', \
                                                     'suffix' : '_nmcm'}, \
                                  'NArowtype' : 'parsenumeric_commas', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'mnmx'}})
    process_dict.update({'nmc4' : {'dualprocess' : self.process_nmr4_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_nmr4_class, \
                                  'inverseprocess' : self.inverseprocess_nmrc, \
                                  'info_retention' : True, \
                                  'defaultparams' : {'convention' : 'commas', \
                                                     'suffix' : '_nmc4', \
                                                     'test_same_as_train' : True}, \
                                  'NArowtype' : 'parsenumeric_commas', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmrc'}})
    process_dict.update({'nmc5' : {'dualprocess' : self.process_nmr4_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_nmr4_class, \
                                  'inverseprocess' : self.inverseprocess_nmrc, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'convention' : 'commas', \
                                                     'suffix' : '_nmc4', \
                                                     'test_same_as_train' : True}, \
                                  'NArowtype' : 'parsenumeric_commas', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'nmc6' : {'dualprocess' : self.process_nmr4_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_nmr4_class, \
                                  'inverseprocess' : self.inverseprocess_nmrc, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'convention' : 'commas', \
                                                     'suffix' : '_nmc4', \
                                                     'test_same_as_train' : True}, \
                                  'NArowtype' : 'parsenumeric_commas', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'mnmx'}})
    process_dict.update({'nmc7' : {'dualprocess' : self.process_nmr4_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_nmr4_class, \
                                  'inverseprocess' : self.inverseprocess_nmrc, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'convention' : 'commas', \
                                                     'suffix' : '_nmc7', \
                                                     'test_same_as_train' : False}, \
                                  'NArowtype' : 'parsenumeric_commas', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmrc'}})
    process_dict.update({'nmc8' : {'dualprocess' : self.process_nmr4_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_nmr4_class, \
                                  'inverseprocess' : self.inverseprocess_nmrc, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'convention' : 'commas', \
                                                     'suffix' : '_nmc7', \
                                                     'test_same_as_train' : False}, \
                                  'NArowtype' : 'parsenumeric_commas', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'nmc9' : {'dualprocess' : self.process_nmr4_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_nmr4_class, \
                                  'inverseprocess' : self.inverseprocess_nmrc, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'convention' : 'commas', \
                                                     'suffix' : '_nmc7', \
                                                     'test_same_as_train' : False}, \
                                  'NArowtype' : 'parsenumeric_commas', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'mnmx'}})
    process_dict.update({'nmEU' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_nmrc_class, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self.inverseprocess_nmrc, \
                                  'info_retention' : True, \
                                  'defaultparams' : {'convention' : 'spaces', \
                                                     'suffix' : '_nmEU'}, \
                                  'NArowtype' : 'parsenumeric_EU', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmrc'}})
    process_dict.update({'nmE2' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_nmrc_class, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self.inverseprocess_nmrc, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'convention' : 'spaces', \
                                                     'suffix' : '_nmEU'}, \
                                  'NArowtype' : 'parsenumeric_EU', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'nmE3' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_nmrc_class, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self.inverseprocess_nmrc, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'convention' : 'spaces', \
                                                     'suffix' : '_nmEU'}, \
                                  'NArowtype' : 'parsenumeric_EU', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'mnmx'}})
    process_dict.update({'nmE4' : {'dualprocess' : self.process_nmr4_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_nmr4_class, \
                                  'inverseprocess' : self.inverseprocess_nmrc, \
                                  'info_retention' : True, \
                                  'defaultparams' : {'convention' : 'spaces', \
                                                     'suffix' : '_nmE4', \
                                                     'test_same_as_train' : True}, \
                                  'NArowtype' : 'parsenumeric_EU', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmrc'}})
    process_dict.update({'nmE5' : {'dualprocess' : self.process_nmr4_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_nmr4_class, \
                                  'inverseprocess' : self.inverseprocess_nmrc, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'convention' : 'spaces', \
                                                     'suffix' : '_nmE4', \
                                                     'test_same_as_train' : True}, \
                                  'NArowtype' : 'parsenumeric_EU', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'nmE6' : {'dualprocess' : self.process_nmr4_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_nmr4_class, \
                                  'inverseprocess' : self.inverseprocess_nmrc, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'convention' : 'spaces', \
                                                     'suffix' : '_nmE4', \
                                                     'test_same_as_train' : True}, \
                                  'NArowtype' : 'parsenumeric_EU', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'mnmx'}})
    process_dict.update({'nmE7' : {'dualprocess' : self.process_nmr4_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_nmr4_class, \
                                  'inverseprocess' : self.inverseprocess_nmrc, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'convention' : 'spaces', \
                                                     'suffix' : '_nmE7', \
                                                     'test_same_as_train' : False}, \
                                  'NArowtype' : 'parsenumeric_EU', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmrc'}})
    process_dict.update({'nmE8' : {'dualprocess' : self.process_nmr4_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_nmr4_class, \
                                  'inverseprocess' : self.inverseprocess_nmrc, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'convention' : 'spaces', \
                                                     'suffix' : '_nmE7', \
                                                     'test_same_as_train' : False}, \
                                  'NArowtype' : 'parsenumeric_EU', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'nmE9' : {'dualprocess' : self.process_nmr4_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_nmr4_class, \
                                  'inverseprocess' : self.inverseprocess_nmrc, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'convention' : 'spaces', \
                                                     'suffix' : '_nmE7', \
                                                     'test_same_as_train' : False}, \
                                  'NArowtype' : 'parsenumeric_EU', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'mnmx'}})
    process_dict.update({'ors7' : {'dualprocess' : self.process_spl2_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_spl2_class, \
                                  'inverseprocess' : self.inverseprocess_spl2, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix' : '_spl5', \
                                                     'test_same_as_train' : False, \
                                                     'consolidate_nonoverlaps' : True}, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'ord3'}})
    process_dict.update({'ors5' : {'dualprocess' : self.process_spl2_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_spl2_class, \
                                  'inverseprocess' : self.inverseprocess_spl2, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix' : '_spl5', \
                                                     'test_same_as_train' : False, \
                                                     'consolidate_nonoverlaps' : True}, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'ord3'}})
    process_dict.update({'ors6' : {'dualprocess' : self.process_spl2_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_spl2_class, \
                                  'inverseprocess' : self.inverseprocess_spl2, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix' : '_spl5', \
                                                     'test_same_as_train' : False, \
                                                     'consolidate_nonoverlaps' : True}, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'ord3'}})
    process_dict.update({'ordl' : {'dualprocess' : self.process_ordl_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_ordl_class, \
                                  'inverseprocess' : self.inverseprocess_ordl, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'ordl'}})
    process_dict.update({'ord2' : {'dualprocess' : self.process_ordl_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_ordl_class, \
                                  'inverseprocess' : self.inverseprocess_ordl, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'mnmx'}})
    process_dict.update({'ord3' : {'dualprocess' : self.process_ord3_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_ord3_class, \
                                  'inverseprocess' : self.inverseprocess_ord3, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'ord3'}})
    process_dict.update({'ord5' : {'dualprocess' : self.process_ordl_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_ordl_class, \
                                  'inverseprocess' : self.inverseprocess_ordl, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'ordl'}})
    process_dict.update({'ucct' : {'dualprocess' : self.process_ucct_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_ucct_class, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'ucct'}})
    process_dict.update({'ord4' : {'dualprocess' : self.process_ord3_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_ord3_class, \
                                  'inverseprocess' : self.inverseprocess_ord3, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'mnmx'}})
    process_dict.update({'ors2' : {'dualprocess' : self.process_spl2_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_spl2_class, \
                                  'inverseprocess' : self.inverseprocess_spl2, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix' : '_spl2', \
                                                     'test_same_as_train' : False, \
                                                     'consolidate_nonoverlaps' : False}, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'ord3'}})
    process_dict.update({'or10' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'mnmx'}})
    process_dict.update({'or11' : {'dualprocess' : self.process_1010_class, \
                                   'singleprocess' : None, \
                                   'postprocess' : self.postprocess_1010_class, \
                                   'inverseprocess' : self.inverseprocess_1010, \
                                   'info_retention' : True, \
                                   'NArowtype' : 'justNaN', \
                                   'MLinfilltype' : '1010', \
                                   'labelctgy' : 'ord3'}})
    process_dict.update({'or12' : {'dualprocess' : self.process_1010_class, \
                                   'singleprocess' : None, \
                                   'postprocess' : self.postprocess_1010_class, \
                                   'inverseprocess' : self.inverseprocess_1010, \
                                   'info_retention' : True, \
                                   'NArowtype' : 'justNaN', \
                                   'MLinfilltype' : '1010', \
                                   'labelctgy' : 'ord3'}})
    process_dict.update({'or13' : {'dualprocess' : self.process_1010_class, \
                                   'singleprocess' : None, \
                                   'postprocess' : self.postprocess_1010_class, \
                                   'inverseprocess' : self.inverseprocess_1010, \
                                   'info_retention' : True, \
                                   'NArowtype' : 'justNaN', \
                                   'MLinfilltype' : '1010', \
                                   'labelctgy' : 'ord3'}})
    process_dict.update({'or14' : {'dualprocess' : self.process_1010_class, \
                                   'singleprocess' : None, \
                                   'postprocess' : self.postprocess_1010_class, \
                                   'inverseprocess' : self.inverseprocess_1010, \
                                   'info_retention' : True, \
                                   'NArowtype' : 'justNaN', \
                                   'MLinfilltype' : '1010', \
                                   'labelctgy' : 'ord3'}})
    process_dict.update({'or15' : {'dualprocess' : None, \
                                   'singleprocess' : self.process_UPCS_class, \
                                   'postprocess' : None, \
                                   'inverseprocess' : self.inverseprocess_UPCS, \
                                   'info_retention' : False, \
                                   'NArowtype' : 'justNaN', \
                                   'MLinfilltype' : 'exclude', \
                                   'labelctgy' : 'ord3'}})
    process_dict.update({'or16' : {'dualprocess' : None, \
                                   'singleprocess' : self.process_UPCS_class, \
                                   'postprocess' : None, \
                                   'inverseprocess' : self.inverseprocess_UPCS, \
                                   'info_retention' : False, \
                                   'NArowtype' : 'justNaN', \
                                   'MLinfilltype' : 'exclude', \
                                   'labelctgy' : 'ord3'}})
    process_dict.update({'or17' : {'dualprocess' : None, \
                                   'singleprocess' : self.process_UPCS_class, \
                                   'postprocess' : None, \
                                   'inverseprocess' : self.inverseprocess_UPCS, \
                                   'info_retention' : False, \
                                   'NArowtype' : 'justNaN', \
                                   'MLinfilltype' : 'exclude', \
                                   'labelctgy' : 'ord3'}})
    process_dict.update({'or18' : {'dualprocess' : None, \
                                   'singleprocess' : self.process_UPCS_class, \
                                   'postprocess' : None, \
                                   'inverseprocess' : self.inverseprocess_UPCS, \
                                   'info_retention' : False, \
                                   'NArowtype' : 'justNaN', \
                                   'MLinfilltype' : 'exclude', \
                                   'labelctgy' : 'ord3'}})
    process_dict.update({'or19' : {'dualprocess' : None, \
                                   'singleprocess' : self.process_UPCS_class, \
                                   'postprocess' : None, \
                                   'inverseprocess' : self.inverseprocess_UPCS, \
                                   'info_retention' : False, \
                                   'NArowtype' : 'justNaN', \
                                   'MLinfilltype' : 'exclude', \
                                   'labelctgy' : 'ord3'}})
    process_dict.update({'or20' : {'dualprocess' : None, \
                                   'singleprocess' : self.process_UPCS_class, \
                                   'postprocess' : None, \
                                   'inverseprocess' : self.inverseprocess_UPCS, \
                                   'info_retention' : False, \
                                   'NArowtype' : 'justNaN', \
                                   'MLinfilltype' : 'exclude', \
                                   'labelctgy' : 'ord3'}})
    process_dict.update({'or21' : {'dualprocess' : None, \
                                   'singleprocess' : self.process_UPCS_class, \
                                   'postprocess' : None, \
                                   'inverseprocess' : self.inverseprocess_UPCS, \
                                   'info_retention' : False, \
                                   'NArowtype' : 'justNaN', \
                                   'MLinfilltype' : 'exclude', \
                                   'labelctgy' : 'ord3'}})
    process_dict.update({'or22' : {'dualprocess' : None, \
                                   'singleprocess' : self.process_UPCS_class, \
                                   'postprocess' : None, \
                                   'inverseprocess' : self.inverseprocess_UPCS, \
                                   'info_retention' : False, \
                                   'NArowtype' : 'justNaN', \
                                   'MLinfilltype' : 'exclude', \
                                   'labelctgy' : 'ord3'}})
    process_dict.update({'om10' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'mnmx'}})
    process_dict.update({'mmor' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'mnmx'}})
    process_dict.update({'1010' : {'dualprocess' : self.process_1010_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_1010_class, \
                                  'inverseprocess' : self.inverseprocess_1010, \
                                  'info_retention' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : '1010', \
                                  'labelctgy' : '1010'}})
    process_dict.update({'bxcx' : {'dualprocess' : self.process_bxcx_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_bxcx_class, \
                                  'NArowtype' : 'positivenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'tmsc' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'time' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'time'}})
    process_dict.update({'date' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'time'}})
    process_dict.update({'dat2' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'hldy'}})
    process_dict.update({'dat3' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'dat4' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'dat5' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'dat6' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'year' : {'dualprocess' : self.process_time_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_time_class, \
                                  'inverseprocess' : self.inverseprocess_year, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'scale' : 'year', \
                                                     'suffix' : '_year', \
                                                     'normalization' : 'zscore'}, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'time'}})
    process_dict.update({'yea2' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'time'}})
    process_dict.update({'yrsn' : {'dualprocess' : self.process_tmsc_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_tmsc_class, \
                                  'defaultparams' : {'scale' : 'year', \
                                                     'suffix' : '_yrsn', \
                                                     'function' : 'sin'}, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'yrcs' : {'dualprocess' : self.process_tmsc_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_tmsc_class, \
                                  'defaultparams' : {'scale' : 'year', \
                                                     'suffix' : '_yrcs', \
                                                     'function' : 'cos'}, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'mnth' : {'dualprocess' : self.process_time_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_time_class, \
                                  'defaultparams' : {'scale' : 'month', \
                                                     'suffix' : '_mnth', \
                                                     'normalization' : 'zscore'}, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'time'}})
    process_dict.update({'mnt2' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'mnt3' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'mnt4' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'mnt5' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'mnt6' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'mnsn' : {'dualprocess' : self.process_tmsc_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_tmsc_class, \
                                  'defaultparams' : {'scale' : 'month', \
                                                     'suffix' : '_mnsn', \
                                                     'function' : 'sin'}, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'mncs' : {'dualprocess' : self.process_tmsc_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_tmsc_class, \
                                  'defaultparams' : {'scale' : 'month', \
                                                     'suffix' : '_mncs', \
                                                     'function' : 'cos'}, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'mdsn' : {'dualprocess' : self.process_tmsc_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_tmsc_class, \
                                  'defaultparams' : {'scale' : 'monthday', \
                                                     'suffix' : '_mdsn', \
                                                     'function' : 'sin'}, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'mdcs' : {'dualprocess' : self.process_tmsc_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_tmsc_class, \
                                  'defaultparams' : {'scale' : 'monthday', \
                                                     'suffix' : '_mdcs', \
                                                     'function' : 'cos'}, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'days' : {'dualprocess' : self.process_time_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_time_class, \
                                  'defaultparams' : {'scale' : 'day', \
                                                     'suffix' : '_days', \
                                                     'normalization' : 'zscore'}, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'time'}})
    process_dict.update({'day2' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'day3' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'day4' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'day5' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'dysn' : {'dualprocess' : self.process_tmsc_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_tmsc_class, \
                                  'defaultparams' : {'scale' : 'day', \
                                                     'suffix' : '_dysn', \
                                                     'function' : 'sin'}, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'dycs' : {'dualprocess' : self.process_tmsc_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_tmsc_class, \
                                  'defaultparams' : {'scale' : 'day', \
                                                     'suffix' : '_dycs', \
                                                     'function' : 'cos'}, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'dhms' : {'dualprocess' : self.process_tmsc_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_tmsc_class, \
                                  'defaultparams' : {'scale' : 'dayhourminute', \
                                                     'suffix' : '_dhms', \
                                                     'function' : 'sin'}, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'dhmc' : {'dualprocess' : self.process_tmsc_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_tmsc_class, \
                                  'defaultparams' : {'scale' : 'dayhourminute', \
                                                     'suffix' : '_dhmc', \
                                                     'function' : 'cos'}, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'hour' : {'dualprocess' : self.process_time_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_time_class, \
                                  'defaultparams' : {'scale' : 'hour', \
                                                     'suffix' : '_hour', \
                                                     'normalization' : 'zscore'}, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'time'}})
    process_dict.update({'hrs2' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'hrs3' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'hrs4' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'hrsn' : {'dualprocess' : self.process_tmsc_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_tmsc_class, \
                                  'defaultparams' : {'scale' : 'hour', \
                                                     'suffix' : '_hrsn', \
                                                     'function' : 'sin'}, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'hrcs' : {'dualprocess' : self.process_tmsc_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_tmsc_class, \
                                  'defaultparams' : {'scale' : 'hour', \
                                                     'suffix' : '_hrcs', \
                                                     'function' : 'cos'}, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'hmss' : {'dualprocess' : self.process_tmsc_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_tmsc_class, \
                                  'defaultparams' : {'scale' : 'hourminutesecond', \
                                                     'suffix' : '_hmss', \
                                                     'function' : 'sin'}, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'hmsc' : {'dualprocess' : self.process_tmsc_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_tmsc_class, \
                                  'defaultparams' : {'scale' : 'hourminutesecond', \
                                                     'suffix' : '_hmsc', \
                                                     'function' : 'cos'}, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'mint' : {'dualprocess' : self.process_time_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_time_class, \
                                  'defaultparams' : {'scale' : 'minute', \
                                                     'suffix' : '_mint', \
                                                     'normalization' : 'zscore'}, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'time'}})
    process_dict.update({'min2' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'min3' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'min4' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'misn'}})
    process_dict.update({'misn' : {'dualprocess' : self.process_tmsc_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_tmsc_class, \
                                  'defaultparams' : {'scale' : 'minute', \
                                                     'suffix' : '_misn', \
                                                     'function' : 'sin'}, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'mics' : {'dualprocess' : self.process_tmsc_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_tmsc_class, \
                                  'defaultparams' : {'scale' : 'minute', \
                                                     'suffix' : '_mics', \
                                                     'function' : 'cos'}, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'mics'}})
    process_dict.update({'mssn' : {'dualprocess' : self.process_tmsc_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_tmsc_class, \
                                  'defaultparams' : {'scale' : 'minutesecond', \
                                                     'suffix' : '_mssn', \
                                                     'function' : 'sin'}, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'mscs' : {'dualprocess' : self.process_tmsc_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_tmsc_class, \
                                  'defaultparams' : {'scale' : 'minutesecond', \
                                                     'suffix' : '_mscs', \
                                                     'function' : 'cos'}, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'scnd' : {'dualprocess' : self.process_time_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_time_class, \
                                  'defaultparams' : {'scale' : 'second', \
                                                     'suffix' : '_scnd', \
                                                     'normalization' : 'zscore'}, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'time'}})
    process_dict.update({'scn2' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'scsn' : {'dualprocess' : self.process_tmsc_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_tmsc_class, \
                                  'defaultparams' : {'scale' : 'second', \
                                                     'suffix' : '_scsn', \
                                                     'function' : 'sin'}, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'sccs' : {'dualprocess' : self.process_tmsc_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_tmsc_class, \
                                  'defaultparams' : {'scale' : 'second', \
                                                     'suffix' : '_sccs', \
                                                     'function' : 'cos'}, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'bxc2' : {'dualprocess' : self.process_bxcx_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_bxcx_class, \
                                  'NArowtype' : 'positivenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'bxc3' : {'dualprocess' : self.process_bxcx_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_bxcx_class, \
                                  'NArowtype' : 'positivenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'bxc4' : {'dualprocess' : self.process_bxcx_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_bxcx_class, \
                                  'NArowtype' : 'positivenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'bxc5' : {'dualprocess' : self.process_bxcx_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_bxcx_class, \
                                  'NArowtype' : 'positivenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'ntgr' : {'dualprocess' : self.process_ord3_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_ord3_class, \
                                  'inverseprocess' : self.inverseprocess_ord3, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'integer', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'mnmx'}})
    process_dict.update({'ntg2' : {'dualprocess' : self.process_ord3_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_ord3_class, \
                                  'inverseprocess' : self.inverseprocess_ord3, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'integer', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'mnmx'}})
    process_dict.update({'ntg3' : {'dualprocess' : self.process_ord3_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_ord3_class, \
                                  'inverseprocess' : self.inverseprocess_ord3, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'integer', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'mnmx'}})
    process_dict.update({'pwrs' : {'dualprocess' : self.process_pwrs_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_pwrs_class, \
                                  'inverseprocess' : self.inverseprocess_pwr2, \
                                  'info_retention' : False, \
                                  'NArowtype' : 'positivenumeric', \
                                  'MLinfilltype' : 'multirt', \
                                  'labelctgy' : 'pwrs'}})
    process_dict.update({'pwr2' : {'dualprocess' : self.process_pwrs_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_pwrs_class, \
                                  'inverseprocess' : self.inverseprocess_pwr2, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'negvalues' : True}, \
                                  'NArowtype' : 'nonzeronumeric', \
                                  'MLinfilltype' : 'multirt', \
                                  'labelctgy' : 'pwrs'}})
    process_dict.update({'log0' : {'dualprocess' : self.process_log0_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_log0_class, \
                                  'inverseprocess' : self.inverseprocess_log0, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'positivenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'log0'}})
    process_dict.update({'log1' : {'dualprocess' : self.process_log0_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_log0_class, \
                                  'inverseprocess' : self.inverseprocess_log0, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'positivenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'log0'}})
    process_dict.update({'logn' : {'dualprocess' : self.process_logn_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_logn_class, \
                                  'inverseprocess' : self.inverseprocess_logn, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'positivenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'logn'}})
    process_dict.update({'lgnm' : {'dualprocess' : self.process_logn_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_logn_class, \
                                  'inverseprocess' : self.inverseprocess_logn, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'positivenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'sqrt' : {'dualprocess' : self.process_sqrt_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_sqrt_class, \
                                  'inverseprocess' : self.inverseprocess_sqrt, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'nonnegativenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'sqrt'}})
    process_dict.update({'addd' : {'dualprocess' : self.process_addd_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_addd_class, \
                                  'inverseprocess' : self.inverseprocess_addd, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'addd'}})
    process_dict.update({'sbtr' : {'dualprocess' : self.process_sbtr_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_sbtr_class, \
                                  'inverseprocess' : self.inverseprocess_sbtr, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'sbtr'}})
    process_dict.update({'mltp' : {'dualprocess' : self.process_mltp_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_mltp_class, \
                                  'inverseprocess' : self.inverseprocess_mltp, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'mltp'}})
    process_dict.update({'divd' : {'dualprocess' : self.process_divd_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_divd_class, \
                                  'inverseprocess' : self.inverseprocess_divd, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'divd'}})
    process_dict.update({'rais' : {'dualprocess' : self.process_rais_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_rais_class, \
                                  'inverseprocess' : self.inverseprocess_rais, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'rais'}})
    process_dict.update({'absl' : {'dualprocess' : self.process_absl_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_absl_class, \
                                  'inverseprocess' : self.inverseprocess_absl, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'absl'}})
    process_dict.update({'bkt1' : {'dualprocess' : self.process_bkt1_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_bkt1_class, \
                                  'inverseprocess' : self.inverseprocess_bkt1, \
                                  'info_retention' : False, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'multirt', \
                                  'labelctgy' : 'bkt1'}})
    process_dict.update({'bkt2' : {'dualprocess' : self.process_bkt2_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_bkt2_class, \
                                  'inverseprocess' : self.inverseprocess_bkt2, \
                                  'info_retention' : False, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'multirt', \
                                  'labelctgy' : 'bkt2'}})
    process_dict.update({'bkt3' : {'dualprocess' : self.process_bkt3_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_bkt3_class, \
                                  'inverseprocess' : self.inverseprocess_bkt3, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'bkt3'}})
    process_dict.update({'bkt4' : {'dualprocess' : self.process_bkt4_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_bkt4_class, \
                                  'inverseprocess' : self.inverseprocess_bkt4, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'bkt4'}})
    process_dict.update({'wkdy' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_wkdy_class, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'binary', \
                                  'labelctgy' : 'wkdy'}})
    process_dict.update({'bshr' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_bshr_class, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'binary', \
                                  'labelctgy' : 'bshr'}})
    process_dict.update({'hldy' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_hldy_class, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'binary', \
                                  'labelctgy' : 'hldy'}})
    process_dict.update({'wkds' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_wkds_class, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'text'}})
    process_dict.update({'wkdo' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_wkds_class, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'ord3'}})
    process_dict.update({'mnts' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_mnts_class, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'text'}})
    process_dict.update({'mnto' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_mnts_class, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'ord3'}})
    process_dict.update({'bins' : {'dualprocess' : self.process_bins_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_bins_class, \
                                  'inverseprocess' : self.inverseprocess_bins, \
                                  'info_retention' : False, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'multirt', \
                                  'labelctgy' : 'bins'}})
    process_dict.update({'bint' : {'dualprocess' : self.process_bins_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_bins_class, \
                                  'inverseprocess' : self.inverseprocess_bins, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'normalizedinput' : True}, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'multirt', \
                                  'labelctgy' : 'bins'}})
    process_dict.update({'bsor' : {'dualprocess' : self.process_bsor_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_bsor_class, \
                                  'inverseprocess' : self.inverseprocess_bsor, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'bsor'}})
    process_dict.update({'btor' : {'dualprocess' : self.process_bsor_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_bsor_class, \
                                  'inverseprocess' : self.inverseprocess_bsor, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'normalizedinput' : True}, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'bsor'}})
    process_dict.update({'bnwd' : {'dualprocess' : self.process_bnwd_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_bnwd_class, \
                                  'inverseprocess' : self.inverseprocess_bnwd, \
                                  'info_retention' : False, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'multirt', \
                                  'labelctgy' : 'bnwd'}})
    process_dict.update({'bnwK' : {'dualprocess' : self.process_bnwd_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_bnwd_class, \
                                  'inverseprocess' : self.inverseprocess_bnwd, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix':'_bnwK', 'width':1000}, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'multirt', \
                                  'labelctgy' : 'bnwd'}})
    process_dict.update({'bnwM' : {'dualprocess' : self.process_bnwd_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_bnwd_class, \
                                  'inverseprocess' : self.inverseprocess_bnwd, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix':'_bnwM', 'width':1000000}, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'multirt', \
                                  'labelctgy' : 'bnwd'}})
    process_dict.update({'bnwo' : {'dualprocess' : self.process_bnwo_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_bnwo_class, \
                                  'inverseprocess' : self.inverseprocess_bnwo, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'bnwo'}})
    process_dict.update({'bnKo' : {'dualprocess' : self.process_bnwo_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_bnwo_class, \
                                  'inverseprocess' : self.inverseprocess_bnwo, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'defaultparams' : {'suffix':'_bnKo', 'width':1000}, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'bnwo'}})
    process_dict.update({'bnMo' : {'dualprocess' : self.process_bnwo_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_bnwo_class, \
                                  'inverseprocess' : self.inverseprocess_bnwo, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'defaultparams' : {'suffix':'_bnMo', 'width':1000000}, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'bnwo'}})
    process_dict.update({'bnep' : {'dualprocess' : self.process_bnep_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_bnep_class, \
                                  'inverseprocess' : self.inverseprocess_bnep, \
                                  'info_retention' : False, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'multirt', \
                                  'labelctgy' : 'bnep'}})
    process_dict.update({'bne7' : {'dualprocess' : self.process_bnep_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_bnep_class, \
                                  'inverseprocess' : self.inverseprocess_bnep, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix':'_bne7', 'bincount':7}, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'multirt', \
                                  'labelctgy' : 'bnep'}})
    process_dict.update({'bne9' : {'dualprocess' : self.process_bnep_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_bnep_class, \
                                  'inverseprocess' : self.inverseprocess_bnep, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix':'_bne9', 'bincount':9}, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'multirt', \
                                  'labelctgy' : 'bnep'}})
    process_dict.update({'bneo' : {'dualprocess' : self.process_bneo_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_bneo_class, \
                                  'inverseprocess' : self.inverseprocess_bneo, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'bneo'}})
    process_dict.update({'bn7o' : {'dualprocess' : self.process_bneo_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_bneo_class, \
                                  'inverseprocess' : self.inverseprocess_bneo, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix':'_bn7o', 'bincount':7}, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'bneo'}})
    process_dict.update({'bn9o' : {'dualprocess' : self.process_bneo_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_bneo_class, \
                                  'inverseprocess' : self.inverseprocess_bneo, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix':'_bn9o', 'bincount':9}, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'bneo'}})
    process_dict.update({'tlbn' : {'dualprocess' : self.process_tlbn_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_tlbn_class, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'concurrent_nmbr', \
                                  'labelctgy' : 'tlbn'}})
    process_dict.update({'pwor' : {'dualprocess' : self.process_pwor_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_pwor_class, \
                                  'inverseprocess' : self.inverseprocess_por2, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'positivenumeric', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'pwor'}})
    process_dict.update({'por2' : {'dualprocess' : self.process_pwor_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_pwor_class, \
                                  'inverseprocess' : self.inverseprocess_por2, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'defaultparams' : {'negvalues' : True}, \
                                  'NArowtype' : 'nonzeronumeric', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'pwor'}})
    process_dict.update({'por3' : {'dualprocess' : self.process_pwor_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_pwor_class, \
                                  'inverseprocess' : self.inverseprocess_por2, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'defaultparams' : {'negvalues' : True}, \
                                  'NArowtype' : 'nonzeronumeric', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : '1010'}})
    process_dict.update({'bkb3' : {'dualprocess' : self.process_bkt3_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_bkt3_class, \
                                  'inverseprocess' : self.inverseprocess_bkt3, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : '1010'}})
    process_dict.update({'bkb4' : {'dualprocess' : self.process_bkt4_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_bkt4_class, \
                                  'inverseprocess' : self.inverseprocess_bkt4, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : '1010'}})
    process_dict.update({'bsbn' : {'dualprocess' : self.process_bsor_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_bsor_class, \
                                  'inverseprocess' : self.inverseprocess_bsor, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : '1010'}})
    process_dict.update({'bnwb' : {'dualprocess' : self.process_bnwo_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_bnwo_class, \
                                  'inverseprocess' : self.inverseprocess_bnwo, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : '1010'}})
    process_dict.update({'bnKb' : {'dualprocess' : self.process_bnwo_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_bnwo_class, \
                                  'inverseprocess' : self.inverseprocess_bnwo, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix':'_bnKo', 'width':1000}, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : '1010'}})
    process_dict.update({'bnMb' : {'dualprocess' : self.process_bnwo_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_bnwo_class, \
                                  'inverseprocess' : self.inverseprocess_bnwo, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix':'_bnMo', 'width':1000000}, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : '1010'}})
    process_dict.update({'bneb' : {'dualprocess' : self.process_bneo_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_bneo_class, \
                                  'inverseprocess' : self.inverseprocess_bneo, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : '1010'}})
    process_dict.update({'bn7b' : {'dualprocess' : self.process_bneo_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_bneo_class, \
                                  'inverseprocess' : self.inverseprocess_bneo, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix':'_bn7o', 'bincount':7}, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : '1010'}})
    process_dict.update({'bn9b' : {'dualprocess' : self.process_bneo_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_bneo_class, \
                                  'inverseprocess' : self.inverseprocess_bneo, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix':'_bn9o', 'bincount':9}, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : '1010'}})
    process_dict.update({'pwbn' : {'dualprocess' : self.process_pwor_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_pwor_class, \
                                  'inverseprocess' : self.inverseprocess_por2, \
                                  'info_retention' : False, \
                                  'NArowtype' : 'positivenumeric', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : '1010'}})
    process_dict.update({'DPn3' : {'dualprocess' : self.process_numerical_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_numerical_class, \
                                  'inverseprocess' : self.inverseprocess_nmbr, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'DPnb'}})
    process_dict.update({'DPnb' : {'dualprocess' : self.process_DPnb_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_DPnb_class, \
                                  'inverseprocess' : self.inverseprocess_UPCS, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'DPnb'}})
    process_dict.update({'DPm2' : {'dualprocess' : self.process_mnmx_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_mnmx_class, \
                                  'inverseprocess' : self.inverseprocess_mnmx, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'DPmm'}})
    process_dict.update({'DPmm' : {'dualprocess' : self.process_DPmm_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_DPmm_class, \
                                  'inverseprocess' : self.inverseprocess_UPCS, \
                                  'info_retention' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'DPmm'}})
    process_dict.update({'DPrt' : {'dualprocess' : self.process_DPrt_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_DPrt_class, \
                                  'inverseprocess' : self.inverseprocess_retn, \
                                  'info_retention' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'DPrt'}})
    process_dict.update({'DLn3' : {'dualprocess' : self.process_numerical_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_numerical_class, \
                                  'inverseprocess' : self.inverseprocess_nmbr, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'DLnb'}})
    process_dict.update({'DLnb' : {'dualprocess' : self.process_DPnb_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_DPnb_class, \
                                  'inverseprocess' : self.inverseprocess_UPCS, \
                                  'info_retention' : True, \
                                  'defaultparams' : {'noisedistribution' : 'laplace'}, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'DLnb'}})
    process_dict.update({'DLm2' : {'dualprocess' : self.process_mnmx_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_mnmx_class, \
                                  'inverseprocess' : self.inverseprocess_mnmx, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'DLmm'}})
    process_dict.update({'DLmm' : {'dualprocess' : self.process_DPmm_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_DPmm_class, \
                                  'inverseprocess' : self.inverseprocess_UPCS, \
                                  'info_retention' : True, \
                                  'defaultparams' : {'noisedistribution' : 'laplace'}, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'DLmm'}})
    process_dict.update({'DLrt' : {'dualprocess' : self.process_DPrt_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_DPrt_class, \
                                  'inverseprocess' : self.inverseprocess_retn, \
                                  'info_retention' : True, \
                                  'defaultparams' : {'noisedistribution' : 'laplace'}, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'DLrt'}})
    process_dict.update({'DPb2' : {'dualprocess' : self.process_binary_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_binary_class, \
                                  'inverseprocess' : self.inverseprocess_bnry, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'binary', \
                                  'labelctgy' : 'DPbn'}})
    process_dict.update({'DPbn' : {'dualprocess' : self.process_DPbn_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_DPbn_class, \
                                  'inverseprocess' : self.inverseprocess_UPCS, \
                                  'info_retention' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'binary', \
                                  'labelctgy' : 'DPbn'}})
    process_dict.update({'DPo4' : {'dualprocess' : self.process_ord3_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_ord3_class, \
                                  'inverseprocess' : self.inverseprocess_ord3, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'DPod'}})
    process_dict.update({'DPod' : {'dualprocess' : self.process_DPod_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_DPod_class, \
                                  'inverseprocess' : self.inverseprocess_UPCS, \
                                  'info_retention' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'DPod'}})
    process_dict.update({'DPo5' : {'dualprocess' : self.process_ord3_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_ord3_class, \
                                  'inverseprocess' : self.inverseprocess_ord3, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'onht'}})
    process_dict.update({'DPo2' : {'dualprocess' : self.process_DPod_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_DPod_class, \
                                  'inverseprocess' : self.inverseprocess_UPCS, \
                                  'info_retention' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'onht'}})
    process_dict.update({'DPoh' : {'dualprocess' : self.process_DPod_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_DPod_class, \
                                  'inverseprocess' : self.inverseprocess_UPCS, \
                                  'info_retention' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'onht'}})
    process_dict.update({'DP10' : {'dualprocess' : self.process_DPod_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_DPod_class, \
                                  'inverseprocess' : self.inverseprocess_UPCS, \
                                  'info_retention' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : '1010'}})
    process_dict.update({'DPo6' : {'dualprocess' : self.process_ord3_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_ord3_class, \
                                  'inverseprocess' : self.inverseprocess_ord3, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : '1010'}})
    process_dict.update({'DPo3' : {'dualprocess' : self.process_DPod_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_DPod_class, \
                                  'inverseprocess' : self.inverseprocess_UPCS, \
                                  'info_retention' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : '1010'}})
    process_dict.update({'NArw' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_NArw_class, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'boolexclude', \
                                  'labelctgy' : 'NArw'}})
    process_dict.update({'NAr2' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_NArw_class, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'boolexclude', \
                                  'labelctgy' : 'NArw'}})
    process_dict.update({'NAr3' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_NArw_class, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'positivenumeric', \
                                  'MLinfilltype' : 'boolexclude', \
                                  'labelctgy' : 'NArw'}})
    process_dict.update({'NAr4' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_NArw_class, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'nonnegativenumeric', \
                                  'MLinfilltype' : 'boolexclude', \
                                  'labelctgy' : 'NArw'}})
    process_dict.update({'NAr5' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_NArw_class, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'integer', \
                                  'MLinfilltype' : 'boolexclude', \
                                  'labelctgy' : 'NArw'}})
    process_dict.update({'null' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_null_class, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'exclude', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : None}})
    process_dict.update({'copy' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_copy_class, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self.inverseprocess_excl, \
                                  'info_retention' : True, \
                                  'NArowtype' : 'exclude', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'copy'}})
    process_dict.update({'excl' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_excl_class, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self.inverseprocess_excl, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'exclude', \
                                  'MLinfilltype' : 'totalexclude', \
                                  'labelctgy' : 'excl'}})
    process_dict.update({'exc2' : {'dualprocess' : self.process_exc2_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_exc2_class, \
                                  'inverseprocess' : self.inverseprocess_UPCS, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'exc2'}})
    process_dict.update({'exc3' : {'dualprocess' : self.process_exc2_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_exc2_class, \
                                  'inverseprocess' : self.inverseprocess_UPCS, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'exc2'}})
    process_dict.update({'exc4' : {'dualprocess' : self.process_exc2_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_exc2_class, \
                                  'inverseprocess' : self.inverseprocess_UPCS, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'exc2'}})
    process_dict.update({'exc5' : {'dualprocess' : self.process_exc5_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_exc5_class, \
                                  'inverseprocess' : self.inverseprocess_UPCS, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'integer', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'exc5'}})
    process_dict.update({'exc6' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_excl_class, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self.inverseprocess_excl, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'exclude', \
                                  'MLinfilltype' : 'totalexclude', \
                                  'labelctgy' : 'excl'}})
    process_dict.update({'shfl' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_shfl_class, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'exclude', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'shfl'}})
    process_dict.update({'nmbd' : {'dualprocess' : self.process_numerical_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_numerical_class, \
                                  'inverseprocess' : self.inverseprocess_nmbr, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'101d' : {'dualprocess' : self.process_1010_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_1010_class, \
                                  'inverseprocess' : self.inverseprocess_1010, \
                                  'info_retention' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : '1010', \
                                  'labelctgy' : '1010'}})
    process_dict.update({'ordd' : {'dualprocess' : self.process_ord3_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_ord3_class, \
                                  'inverseprocess' : self.inverseprocess_ord3, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'ord3'}})
    process_dict.update({'texd' : {'dualprocess' : self.process_text_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_text_class, \
                                  'inverseprocess' : self.inverseprocess_text, \
                                  'info_retention' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'multirt', \
                                  'labelctgy' : 'text'}})
    process_dict.update({'bnrd' : {'dualprocess' : self.process_binary_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_binary_class, \
                                  'inverseprocess' : self.inverseprocess_bnry, \
                                  'info_retention' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'binary', \
                                  'labelctgy' : 'bnry'}})
    process_dict.update({'datd' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'mdsn'}})
    process_dict.update({'nuld' : {'dualprocess' : None, \
                                  'singleprocess' : self.process_null_class, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'exclude', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : None}})
    process_dict.update({'lbnm' : {'dualprocess' : self.process_exc2_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_exc2_class, \
                                  'inverseprocess' : self.inverseprocess_UPCS, \
                                  'info_retention' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'exc2'}})
    process_dict.update({'lb10' : {'dualprocess' : self.process_text_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_text_class, \
                                  'inverseprocess' : self.inverseprocess_text, \
                                  'info_retention' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'multirt', \
                                  'labelctgy' : 'text'}})
    process_dict.update({'lbor' : {'dualprocess' : self.process_ord3_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_ord3_class, \
                                  'inverseprocess' : self.inverseprocess_ord3, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'ord3'}})
    process_dict.update({'lbo5' : {'dualprocess' : self.process_ordl_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_ordl_class, \
                                  'inverseprocess' : self.inverseprocess_ordl, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'ordl'}})
    process_dict.update({'lbos' : {'dualprocess' : self.process_ord3_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_ord3_class, \
                                  'inverseprocess' : self.inverseprocess_ord3, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'ord3'}})
    process_dict.update({'lbte' : {'dualprocess' : self.process_text_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_text_class, \
                                  'inverseprocess' : self.inverseprocess_text, \
                                  'info_retention' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'multirt', \
                                  'labelctgy' : 'text'}})
    process_dict.update({'lbbn' : {'dualprocess' : self.process_binary_class, \
                                  'singleprocess' : None, \
                                  'postprocess' : self.postprocess_binary_class, \
                                  'inverseprocess' : self.inverseprocess_bnry, \
                                  'info_retention' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'multirt', \
                                  'labelctgy' : 'text'}})
    process_dict.update({'lbda' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'mdsn'}})

    return process_dict

  def processfamily(self, df_train, df_test, column, category, origcategory, process_dict, \
                    transform_dict, postprocess_dict, assign_param):
    '''
    #as automunge runs a for loop through each column in automunge, this is the master 
    #processing function applied which runs through the different family primitives
    #populated in the transform_dict by assembletransformdict
    
    #we will run in order of
    #siblings, cousins, parents, auntsuncles
    '''
    
    inplaceperformed = False
    
    #final upstream transform from parents or auntsuncles is elligible for inplace
    #as long as no supplement transforms were applied
    final_upstream = False
    if len(transform_dict[category]['auntsuncles']) == 0:
      if len(transform_dict[category]['parents']) > 0:
        final_upstream = transform_dict[category]['parents'][-1]
    else:
      if len(transform_dict[category]['auntsuncles']) > 0:
        final_upstream = transform_dict[category]['auntsuncles'][-1]

    #process the siblings (with downstream, supplemental)
    for sibling in transform_dict[category]['siblings']:

      if sibling != None:
        #note we use the processparent function here
        df_train, df_test, postprocess_dict, inplaceperformed = \
        self.processparent(df_train, df_test, column, sibling, origcategory, final_upstream, \
                          process_dict, transform_dict, postprocess_dict, assign_param)
    
    #process the cousins (no downstream, supplemental)
    for cousin in transform_dict[category]['cousins']:
      
      #this if statement kind of a placeholder such as for validation of primitive entry
      if cousin != None:

        #note we use the processcousin function here
        df_train, df_test, postprocess_dict, inplaceperformed = \
        self.processcousin(df_train, df_test, column, cousin, origcategory, final_upstream, \
                            process_dict, transform_dict, postprocess_dict, assign_param)

    #process the parents (with downstream, with replacement)
    for parent in transform_dict[category]['parents']:

      if parent != None:

        df_train, df_test, postprocess_dict, inplaceperformed = \
        self.processparent(df_train, df_test, column, parent, origcategory, final_upstream, \
                          process_dict, transform_dict, postprocess_dict, assign_param)
        
    #process the auntsuncles (no downstream, with replacement)
    for auntuncle in transform_dict[category]['auntsuncles']:

      if auntuncle != None:

        #note we use the processcousin function here
        df_train, df_test, postprocess_dict, inplaceperformed = \
        self.processcousin(df_train, df_test, column, auntuncle, origcategory, final_upstream, \
                            process_dict, transform_dict, postprocess_dict, assign_param)

    #if we had replacement transformations performed then mark column for deletion
    #(circle of life)
    if len(transform_dict[category]['auntsuncles']) \
    + len(transform_dict[category]['parents']) > 0 \
    and inplaceperformed is False:
      #here we'll only address downstream generaitons
      if column in postprocess_dict['column_dict']:
        postprocess_dict['column_dict'][column]['deletecolumn'] = True
      else:
        if column not in postprocess_dict['orig_noinplace']:
          postprocess_dict['orig_noinplace'].append(column)  
    elif inplaceperformed is True:
      if column in postprocess_dict['column_dict']:
        postprocess_dict['column_dict'][column]['deletecolumn'] = 'inplace'

    return df_train, df_test, postprocess_dict

  def circleoflife(self, df_train, df_test, column, category, origcategory, process_dict, \
                    transform_dict, postprocess_dict, templist1):
    '''
    #This function deletes source column for cases where family primitives 
    #included replacement, with maintenance of the associated data structures.
    
    #templist1 is the list of df_train columns before processfamily
    '''

    #if we had replacement transformations performed on first generation \
    #then delete the original column
    if len(transform_dict[category]['auntsuncles']) \
    + len(transform_dict[category]['parents']) > 0:
      
      if column in postprocess_dict['orig_noinplace']:
        del df_train[column]
        del df_test[column]

    #if we had replacement transformations performed on downstream generation \
    #then delete the associated parent column 
    
    newcolumns = set(df_train) - set(templist1)
    
    #this one is for columns replaced as part of inplace operation
    if len(newcolumns) > 0:
      anewcolumn = list(newcolumns)[0]
      temp_columnslist = postprocess_dict['column_dict'][anewcolumn]['columnslist'].copy()
      for newcolumn in temp_columnslist:
        if postprocess_dict['column_dict'][newcolumn]['deletecolumn'] == 'inplace':
          for newcolumn2 in temp_columnslist:
            if newcolumn in postprocess_dict['column_dict'][newcolumn2]['columnslist']:        
              postprocess_dict['column_dict'][newcolumn2]['columnslist'].remove(newcolumn)
    
    #this one is for columns we manually delete
    for newcolumn in newcolumns:
      if postprocess_dict['column_dict'][newcolumn]['deletecolumn'] is True:
        for newcolumn2 in newcolumns:
          if newcolumn in postprocess_dict['column_dict'][newcolumn2]['columnslist']:
            postprocess_dict['column_dict'][newcolumn2]['columnslist'].remove(newcolumn)
          
        #now we'll delete column
        #note this only worksa on single column  parents, need to incioroprate categorylist
        #for multicolumn parents (future extension)
        if newcolumn in df_train.columns:
          del df_train[newcolumn]
          del df_test[newcolumn]

    return df_train, df_test, postprocess_dict

  def dictupdate(self, column, column_dict, postprocess_dict):
    '''
    #dictupdate function takes as input column_dict, postprocess_dict, then for cases
    #where origcolmn is the same fo rhte two combines the columnslist and the 
    #normalization_dict, then appends the column_dict onto the postprocess_dict
    #returns the column_dict and postprocess_dict. Note that the passed column name
    #"column" is the column name prior to the applicaiton of processing, and the
    #name of the column after the. last processing funciton is saved as a key
    #in the column_dict
    '''

    #(reason for "key2" instead of key1 is some shuffling during editing)
    for key2 in column_dict:

      #first address carry-though of origcolumn and origcategory from parent to child
      if column in postprocess_dict['column_dict']:

        #if column is not origcolumn in postprocess_dict
        if postprocess_dict['column_dict'][column]['origcolumn'] \
        != column:

          #assign origcolumn from postprocess_dict to column_dict
          column_dict[key2]['origcolumn'] = \
          postprocess_dict['column_dict'][column]['origcolumn']

          #assign origcategory from postprocess_dict to column_dict
          column_dict[key2]['origcategory'] = \
          postprocess_dict['column_dict'][column]['origcategory']

      for key1 in postprocess_dict['column_dict']:

        #if origcolumn is the same between column_dict saved in postprocess_dict and
        #the column_dict outputed from our processing, we'll combine a few values
        if postprocess_dict['column_dict'][key1]['origcolumn'] == column_dict[key2]['origcolumn']:
          #first we'll combine the columnslist capturing all columns 
          #originating from same origcolumn for these two sets
          postprocess_dict['column_dict'][key1]['columnslist'] = \
          list(set(postprocess_dict['column_dict'][key1]['columnslist'])|set(column_dict[key2]['columnslist']))
          #apply that value to the column_dict columnslist as well
          column_dict[key2]['columnslist'] = postprocess_dict['column_dict'][key1]['columnslist']

    #now append column_dict onto postprocess_dict
    postprocess_dict['column_dict'].update(column_dict)

    #return column_dict, postprocess_dict
    return postprocess_dict

  def processcousin(self, df_train, df_test, column, cousin, origcategory, final_upstream, \
                     process_dict, transform_dict, postprocess_dict, assign_param):
    '''
    #cousin is one of the primitives for processfamily function, and it involves
    #transformations without downstream derivations without replacement of source column
    #although this same funciton can be used with the auntsuncles primitive
    #by following with a deletion of original column, also this funciton can be
    #used on the niecesnephews primitive downstream of parents or siblings since 
    #they don't have children (they're way to young for that)
    #note the processing funcitons are accessed through the process_dict

    #reminder the format of assign_param is e.g.
    #assignparam = {'splt' : {'column1' : {'minsplit' : 4}}, \
    #               'spl2' : {'column2' : {'minsplit' : 3}}}

    '''

    #for checking type of processdict entries of custom externally defined transformation functions
    def check_function():
      return
    
    inplaceperformed = False
    inplacecandidate = False
    if final_upstream == cousin:
      inplacecandidate = True

    params = self.grab_params(assign_param, cousin, column, process_dict[cousin], postprocess_dict)

    #if this is a dual process function
    if 'dualprocess' in process_dict[cousin] \
    and (isinstance(process_dict[cousin]['dualprocess'], type(self.processcousin)) \
    or isinstance(process_dict[cousin]['dualprocess'], type(check_function))):
      
      if inplacecandidate is True:
        if 'inplace_option' in process_dict[cousin]:
          if process_dict[cousin]['inplace_option'] is True:
            if 'inplace' not in params:
              inplaceperformed = True
              params.update({'inplace' : True})
            elif ('inplace' in params and params['inplace'] != False):
              inplaceperformed = True
              params.update({'inplace' : True})
            else:
              inplaceperformed = False
      else:
        #user cannot manually specify inplace by design
        if ('inplace' in params and params['inplace'] is True):
          inplaceperformed = False
          params.update({'inplace' : False})

      df_train, df_test, column_dict_list = \
      process_dict[cousin]['dualprocess'](df_train, df_test, column, origcategory, \
                                          postprocess_dict, params)

    #else if this is a single process function process train and test seperately
    elif 'singleprocess' in process_dict[cousin] \
    and (isinstance(process_dict[cousin]['singleprocess'], type(self.processcousin)) \
    or isinstance(process_dict[cousin]['singleprocess'], type(check_function))):
      
      if inplacecandidate is True:
        if 'inplace_option' in process_dict[cousin]:
          if process_dict[cousin]['inplace_option'] is True:
            if 'inplace' not in params:
              inplaceperformed = True
              params.update({'inplace' : True})
            elif ('inplace' in params and params['inplace'] != False):
              inplaceperformed = True
              params.update({'inplace' : True})
            else:
              inplaceperformed = False
      else:
        #user cannot manually specify inplace by design
        if ('inplace' in params and params['inplace'] is True):
          inplaceperformed = False
          params.update({'inplace' : False})

      df_train, column_dict_list =  \
      process_dict[cousin]['singleprocess'](df_train, column, origcategory, \
                                            postprocess_dict, params)

      df_test, _1 = \
      process_dict[cousin]['singleprocess'](df_test, column, origcategory, \
                                            postprocess_dict, params)

    #update the columnslist and normalization_dict for both column_dict and postprocess_dict
    for column_dict in column_dict_list:
      postprocess_dict = self.dictupdate(column, column_dict, postprocess_dict)

    return df_train, df_test, postprocess_dict, inplaceperformed

  def processparent(self, df_train, df_test, column, parent, origcategory, final_upstream, \
                    process_dict, transform_dict, postprocess_dict, assign_param):
    '''
    #parent is one of the primitives for processfamily function, and it involves
    #transformations with downstream derivations with replacement of source column
    #although this same funciton can be used with the siblinga primitive
    #by not following with a deletion of original column, also this funciton can be
    #used on the children primitive downstream of parents or siblings, allowing
    #the children to have children of their own, you know, grandchildren and stuff.
    #note the processing functions are accessed through the process_dict
    
    #reminder the format of assign_param is e.g.
    #assignparam = {'splt' : {'column1' : {'minsplit' : 4}}, \
    #               'spl2' : {'column2' : {'minsplit' : 3}}}
    
    #we want to apply in order of
    #upstream process, niecesnephews, friends, children, coworkers
    '''

    #for checking type of processdict entries of custom externally defined transformation functions
    def check_function():
      return

    #upstream process
    
    inplaceperformed = False
    inplacecandidate = False
    if final_upstream == parent:
      inplacecandidate = True
    
    params = self.grab_params(assign_param, parent, column, process_dict[parent], postprocess_dict)
    
    #if this is a dual process function
    if 'dualprocess' in process_dict[parent] \
    and (isinstance(process_dict[parent]['dualprocess'], type(self.processparent)) \
    or isinstance(process_dict[parent]['dualprocess'], type(check_function))):
      
      if inplacecandidate is True:
        if 'inplace_option' in process_dict[parent]:
          if process_dict[parent]['inplace_option'] is True:
            if 'inplace' not in params:
              inplaceperformed = True
              params.update({'inplace' : True})
            elif ('inplace' in params and params['inplace'] != False):
              inplaceperformed = True
              params.update({'inplace' : True})
            else:
              inplaceperformed = False
      else:
        #user cannot manually specify inplace by design
        if ('inplace' in params and params['inplace'] is True):
          inplaceperformed = False
          params.update({'inplace' : False})

      df_train, df_test, column_dict_list = \
      process_dict[parent]['dualprocess'](df_train, df_test, column, origcategory, \
                                          postprocess_dict, params)

    #else if this is a single process function process train and test seperately
    elif 'singleprocess' in process_dict[parent] \
    and (isinstance(process_dict[parent]['singleprocess'], type(self.processparent)) \
    or isinstance(process_dict[parent]['singleprocess'], type(check_function))):
      
      if inplacecandidate is True:
        if 'inplace_option' in process_dict[parent]:
          if process_dict[parent]['inplace_option'] is True:
            if 'inplace' not in params:
              inplaceperformed = True
              params.update({'inplace' : True})
            elif ('inplace' in params and params['inplace'] != False):
              inplaceperformed = True
              params.update({'inplace' : True})
            else:
              inplaceperformed = False
      else:
        #user cannot manually specify inplace by design
        if ('inplace' in params and params['inplace'] is True):
          inplaceperformed = False
          params.update({'inplace' : False})

      df_train, column_dict_list =  \
      process_dict[parent]['singleprocess'](df_train, column, origcategory, \
                                          postprocess_dict, params)

      df_test, _1 = \
      process_dict[parent]['singleprocess'](df_test, column, origcategory, \
                                          postprocess_dict, params)

    #update the columnslist and normalization_dict for both column_dict and postprocess_dict
    for column_dict in column_dict_list:
      postprocess_dict = self.dictupdate(column, column_dict, postprocess_dict)

      #note this only works for single column source, as currently implemented
      #multicolumn transforms (such as text or bins) cannot serve as parents
      #a future extension may check the categorylist from column_dict for 
      #purposes of transforms applied to multicolumn source
      parentcolumn = list(column_dict.keys())[0]

    #if transform_dict[parent] != None:

    #initialize in case no downstream performed
    parent_inplaceperformed = False
    
    #process any children
    
    #final upstream transform from parents or auntsuncles is elligible for inplace
    #as long as no supplement transforms were applied
    final_downstream = False
    if len(transform_dict[parent]['coworkers']) == 0:
      if len(transform_dict[parent]['children']) > 0:
        final_downstream = transform_dict[parent]['children'][-1]
    else:
      if len(transform_dict[parent]['coworkers']) > 0:
        final_downstream = transform_dict[parent]['coworkers'][-1]

    #process any niecesnephews
    #note the function applied is comparable to processsibling, just a different
    #parent column
    for niecenephew in transform_dict[parent]['niecesnephews']:

      if niecenephew != None:

        #process the niecenephew
        #note the function applied is processparent (using recursion)
        #parent column
        df_train, df_test, postprocess_dict, parent_inplaceperformed = \
        self.processparent(df_train, df_test, parentcolumn, niecenephew, origcategory, final_downstream, \
                           process_dict, transform_dict, postprocess_dict, assign_param)

    #process any friends
    for friend in transform_dict[parent]['friends']:

      if friend != None:

        #process the friend
        #note the function applied is processcousin
        df_train, df_test, postprocess_dict, parent_inplaceperformed = \
        self.processcousin(df_train, df_test, parentcolumn, friend, origcategory, final_downstream, \
                           process_dict, transform_dict, postprocess_dict, assign_param)
    
    for child in transform_dict[parent]['children']:

      if child != None:

        #process the child
        #note the function applied is processparent (using recursion)
        #parent column
        df_train, df_test, postprocess_dict, parent_inplaceperformed = \
        self.processparent(df_train, df_test, parentcolumn, child, origcategory, final_downstream, \
                           process_dict, transform_dict, postprocess_dict, assign_param)

    #process any coworkers
    for coworker in transform_dict[parent]['coworkers']:

      if coworker != None:

        #process the coworker
        #note the function applied is processcousin
        df_train, df_test, postprocess_dict, parent_inplaceperformed = \
        self.processcousin(df_train, df_test, parentcolumn, coworker, origcategory, final_downstream, \
                           process_dict, transform_dict, postprocess_dict, assign_param)

    #if we had replacement transformations performed then mark column for deletion
    #(circle of life)
    if len(transform_dict[parent]['children']) \
    + len(transform_dict[parent]['coworkers']) > 0 \
    and parent_inplaceperformed is False:
      #here we'll only address downstream generaitons
      if parentcolumn in postprocess_dict['column_dict']:
        postprocess_dict['column_dict'][parentcolumn]['deletecolumn'] = True
      else:
        if parentcolumn not in postprocess_dict['orig_noinplace']:
          postprocess_dict['orig_noinplace'].append(parentcolumn)
    elif parent_inplaceperformed is True:
      if parentcolumn in postprocess_dict['column_dict']:
        postprocess_dict['column_dict'][parentcolumn]['deletecolumn'] = 'inplace'

    return df_train, df_test, postprocess_dict, inplaceperformed

  def df_copy_train(self, df_train, column, newcolumn, suffixoverlap_results = {}):
    """
    #performs a copy operation to add column to a df_train
    #Before any new columns added to df_train
    #checks that they are not already present in df_train
    #if so returns error message and logs in suffixoverlap_results
    """
    
    #test for overlap error
    if newcolumn in df_train.columns:
      
      print("*****************")
      print("Warning of suffix overlap error")
      print("When creating new column: ", newcolumn)
      print("The column was already found present in df_train headers.")
      print("")
      print("Some potential quick fixes for this error include:")
      print("- rename columns to integers before passing to automunge(.)")
      print("- strip underscores '_' from column header titles.")
      print("(convention is all suffix appenders include an underscore)")
      print("")
      print("Please note any updates to column headers will need to be carried through to assignment parameters.")
      print("*****************")
      print("")
      
      suffixoverlap_results.update({newcolumn : True})
      
    else:
      
      df_train[newcolumn] = df_train[column].copy()
      
      suffixoverlap_results.update({newcolumn : False})
    
    return df_train, suffixoverlap_results

  def df_check_suffixoverlap(self, df_train, newcolumns, suffixoverlap_results = {}):
    """
    #checks that newcolumns list are not already present in df_train
    #logs in suffixoverlap_results
    """
    
    if not isinstance(newcolumns, list):
      newcolumns = [newcolumns]
    
    for newcolumn in newcolumns:
      
      if newcolumn in df_train.columns:
        
        print("*****************")
        print("Warning of suffix overlap error")
        print("When creating new column: ", newcolumn)
        print("The column was already found present in df_train headers.")
        print("")
        print("Some potential quick fixes for this error include:")
        print("- rename columns to integers before passing to automunge(.)")
        print("- strip underscores '_' from column header titles.")
        print("(convention is all suffix appenders include an underscore)")
        print("")
        print("Please note any updates to column headers will need to be carried through to assignment parameters.")
        print("*****************")
        print("")

        suffixoverlap_results.update({newcolumn : True})

      else:

        suffixoverlap_results.update({newcolumn : False})
        
    return suffixoverlap_results

  def suffix_overlap_final_aggregation_and_printouts(self, postprocess_dict):
    """
    #Performs a final round of printouts in case of identified suffix overlap error
    #Also aggregates the validation results stored in column_dict
    #To a those returned in postprocess_dict['miscparameters_results']
    """
    
    #then at completion of automunge(.), aggregate the suffixoverlap results
    #and do an additional printout if any column overlap error to be sure user sees message
    for entry1 in postprocess_dict['column_dict']:
      for entry2 in postprocess_dict['column_dict'][entry1]['suffixoverlap_results']:
        if postprocess_dict['column_dict'][entry1]['suffixoverlap_results'][entry2] is True:
          
          print("*****************")
          print("Warning of suffix overlap error")
          print("When creating new column: ", entry2)
          print("The column was already found present in df_train headers.")
          print("")
          print("Some potential quick fixes for this error include:")
          print("- rename columns to integers before passing to automunge(.)")
          print("- strip underscores '_' from column header titles.")
          print("(convention is all suffix appenders include an underscore)")
          print("")
          print("Please note any updates to column headers will need to be carried through to assignment parameters.")
          print("*****************")
          print("")
      
      postprocess_dict['miscparameters_results']['suffixoverlap_results'].update(
      postprocess_dict['column_dict'][entry1]['suffixoverlap_results'])

    for entry1 in postprocess_dict['miscparameters_results']['PCA_suffixoverlap_results']:
      if postprocess_dict['miscparameters_results']['PCA_suffixoverlap_results'][entry1] is True:

          print("*****************")
          print("Warning of suffix overlap error")
          print("When creating PCA column: ", entry1)
          print("The column was already found present in df_train headers.")
          print("")
          print("Note that PCA returned columns are of form: PCAcol0")
          print("Where # is integer")
          print("This form of column header should be avoided in passed data.")
          print("")

    for entry1 in postprocess_dict['miscparameters_results']['Binary_suffixoverlap_results']:
      if postprocess_dict['miscparameters_results']['Binary_suffixoverlap_results'][entry1] is True:

          print("*****************")
          print("Warning of suffix overlap error")
          print("When creating Binary column: ", entry1)
          print("The column was already found present in df_train headers.")
          print("")
          print("Note that Binary returned columns are of form: Binary_1010_#")
          print("Where # is integer")
          print("This error might have occured if you passed data including column header 'Binary' to '1010' transform")
          print("This form of column header should be avoided in passed data.")
          print("")

    for entry1 in postprocess_dict['miscparameters_results']['excl_suffixoverlap_results']:
      if postprocess_dict['miscparameters_results']['excl_suffixoverlap_results'][entry1] is True:

          print("*****************")
          print("Warning of suffix overlap error")
          print("When removing '_excl' suffix for column: ", entry1)
          print("The column without suffix was already found present in df_train headers.")
          print("")
          
    return postprocess_dict

  def process_NArw_class(self, df, column, category, postprocess_dict, params = {}):
    '''
    #processing funciton that creates a boolean column indicating 1 for rows
    #corresponding to missing or improperly formated data in source column
    #note this uses the NArows function which has a category specific approach
    #returns same dataframe with new column of name column + '_NArw'
    #note this is a "singleprocess" function since is applied to single dataframe
    '''
    
    suffixoverlap_results = {}
    
    suffixoverlap_results = \
    self.df_check_suffixoverlap(df, column + '_NArw', suffixoverlap_results)

    df[column + '_NArw'] = self.getNArows(df, column, category, postprocess_dict)

    #change NArows data type to 8-bit (1 byte) integers for memory savings
    df[column + '_NArw'] = df[column + '_NArw'].astype(np.int8)

    #create list of columns
    nmbrcolumns = [column + '_NArw']
    
    #for drift report
    pct_NArw = df[column + '_NArw'].sum() / df[column + '_NArw'].shape[0]

    #create normalization dictionary
    NArwnormalization_dict = {column + '_NArw' : {'pct_NArw':pct_NArw}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []
    
    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'NArw', \
                           'origcategory' : category, \
                           'normalization_dict' : NArwnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : [nc], \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())

    return df, column_dict_list

  def process_numerical_class(self, mdf_train, mdf_test, column, category, \
                              postprocess_dict, params = {}):
    '''
    #process_numerical_class(mdf_train, mdf_test, column, category)
    #function to normalize data to mean of 0 and standard deviation of 1 \
    #z score normalization) 
    #takes as arguement pandas dataframe of training and test data (mdf_train), (mdf_test)\
    #and the name of the column string ('column') and parent category (category)
    #replaces missing or improperly formatted data with mean of remaining values
    #returns same dataframes with new column of name column + '_nmbr'
    #note this is a "dualprocess" function since is applied to both dataframes
    #expect this approach works better when the numerical distribution is thin tailed
    #if only have training but not test data handy, use same training data for both dataframe inputs
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    #initialize parameters
    #offset is just an added constant applied after multiplier
    if 'offset' in params:
      offset = params['offset']
    else:
      offset = 0
    
    #multiplier scales the set by multiplication prior to offset
    if 'multiplier' in params:
      multiplier = params['multiplier']
    else:
      multiplier = 1
    
    #cap can be passed as True for max of training data or as a specific value prior to normalization, False for no cap
    if 'cap' in params:
      cap = params['cap']
    else:
      cap = False
      
    #floor can be passed as True for min of training data or as a specific value prior to normalization, False for no floor
    if 'floor' in params:
      floor = params['floor']
    else:
      floor = False
      
    #adjinfill accepts True/False to change default infill from mean inputation to adjacent cell
    if 'adjinfill' in params:
      adjinfill = params['adjinfill']
    else:
      adjinfill = False

    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self.df_copy_train(mdf_train, column, column + '_nmbr', suffixoverlap_results)

      mdf_test[column + '_nmbr'] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self.df_check_suffixoverlap(mdf_train, column + '_nmbr', suffixoverlap_results)
      
      mdf_train.rename(columns = {column : column + '_nmbr'}, inplace = True)
      mdf_test.rename(columns = {column : column + '_nmbr'}, inplace = True)

    #convert all values to either numeric or NaN
    mdf_train[column + '_nmbr'] = pd.to_numeric(mdf_train[column + '_nmbr'], errors='coerce')
    mdf_test[column + '_nmbr'] = pd.to_numeric(mdf_test[column + '_nmbr'], errors='coerce')
    
    #a few more metrics collected for driftreport
    #get maximum value of training column
    maximum = mdf_train[column + '_nmbr'].max()
    #get minimum value of training column
    minimum = mdf_train[column + '_nmbr'].min()
    
    #if cap < maximum, maximum = cap
    if cap is not False and cap is not True:
      if cap < maximum:
        maximum = cap
    if floor is not False and floor is not True:
      if floor > minimum:
        minimum = floor
        
    #cap and floor application
    if cap is True:
      cap = maximum
    if floor is True:
      floor = minimum
      
    if cap is not False:
      #replace values in test > cap with cap
      mdf_train.loc[mdf_train[column + '_nmbr'] > cap, (column + '_nmbr')] \
      = cap
      
      mdf_test.loc[mdf_test[column + '_nmbr'] > cap, (column + '_nmbr')] \
      = cap
    
    if floor is not False:
      #replace values in test < floor with floor
      mdf_train.loc[mdf_train[column + '_nmbr'] < floor, (column + '_nmbr')] \
      = floor
      
      mdf_test.loc[mdf_test[column + '_nmbr'] < floor, (column + '_nmbr')] \
      = floor

    #get mean of training data
    mean = mdf_train[column + '_nmbr'].mean()
    if mean != mean:
      mean = 0
      
    if adjinfill is True:
      mdf_train[column + '_nmbr'] = mdf_train[column + '_nmbr'].fillna(method='ffill')
      mdf_test[column + '_nmbr'] = mdf_test[column + '_nmbr'].fillna(method='ffill')
      mdf_train[column + '_nmbr'] = mdf_train[column + '_nmbr'].fillna(method='bfill')
      mdf_test[column + '_nmbr'] = mdf_test[column + '_nmbr'].fillna(method='bfill')

    #replace missing data with training set mean
    mdf_train[column + '_nmbr'] = mdf_train[column + '_nmbr'].fillna(mean)
    mdf_test[column + '_nmbr'] = mdf_test[column + '_nmbr'].fillna(mean)

    #subtract mean from column for both train and test
    mdf_train[column + '_nmbr'] = mdf_train[column + '_nmbr'] - mean
    mdf_test[column + '_nmbr'] = mdf_test[column + '_nmbr'] - mean

    #get standard deviation of training data
    std = mdf_train[column + '_nmbr'].std()
    
    #special case, if standard deviation is 0 we'll set it to 1 to avoid division by 0
    if std == 0:
      std = 1

    #divide column values by std for both training and test data
    #offset, multiplier are parameters that defaults to zero, one
    mdf_train[column + '_nmbr'] = mdf_train[column + '_nmbr'] / std * multiplier + offset
    mdf_test[column + '_nmbr'] = mdf_test[column + '_nmbr'] / std * multiplier + offset
    
#     #change data type for memory savings
#     mdf_train[column + '_nmbr'] = mdf_train[column + '_nmbr'].astype(np.float32)
#     mdf_test[column + '_nmbr'] = mdf_test[column + '_nmbr'].astype(np.float32)

    #create list of columns
    nmbrcolumns = [column + '_nmbr']

    nmbrnormalization_dict = {column + '_nmbr' : {'mean' : mean, 'std' : std, \
                                                  'max' : maximum, 'min' : minimum, \
                                                  'offset' : offset, 'multiplier': multiplier, \
                                                  'cap' : cap, 'floor' : floor, \
                                                  'adjinfill' : adjinfill}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'nmbr', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return mdf_train, mdf_test, column_dict_list
  
  def process_dxdt_class(self, df, column, category, postprocess_dict, params = {}):
    '''
    #process_dxdt_class(df, column, category, postprocess_dict)
    #function to translate a continues variable into a bounded variable
    #by taking delta of row from preceding row
    #assumes the rows are not shuffled and represent a continuous funciton 
    #with consistent time steps
    
    #for missing values, uses adjacent cell infill as default
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    #initialize parameters
    if 'periods' in params:
      periods = params['periods']
    else:
      periods = 1
    
    if inplace is not True:
      
      #copy source column into new column
      df, suffixoverlap_results = \
      self.df_copy_train(df, column, column + '_dxdt', suffixoverlap_results)
    
    else:
      
      suffixoverlap_results = \
      self.df_check_suffixoverlap(df, column + '_dxdt', suffixoverlap_results)
      
      df.rename(columns = {column : column + '_dxdt'}, inplace = True)
    
    #convert all values to either numeric or NaN
    df[column + '_dxdt'] = pd.to_numeric(df[column + '_dxdt'], errors='coerce')
    
    #apply ffill to replace NArows with value from adjacent cell in pre4ceding row
    df[column + '_dxdt'] = df[column + '_dxdt'].fillna(method='ffill')
    
    #we'll follow with a bfill just in case first row had a nan
    df[column + '_dxdt'] = df[column + '_dxdt'].fillna(method='bfill') 
    
    #subtract preceding row
    df[column + '_dxdt'] = df[column + '_dxdt'] - df[column + '_dxdt'].shift(periods = periods)
    
    #first row will have a nan so just one more backfill
    df[column + '_dxdt'] = df[column + '_dxdt'].fillna(method='bfill')
    
    #then one more infill with to address scenario when data wasn't numeric
    #get arbitrary cell value, if one is nan then all will be
    value = df[column + '_dxdt'].values[0]
    if value != value:
      value = 0

      df[column + '_dxdt'] = df[column + '_dxdt'].fillna(value)
    
    #create list of columns
    nmbrcolumns = [column + '_dxdt']

    #grab some driftreport metrics
    #note that if this function implemented for data streams at scale it may be appropriate
    #to consider creating an alternate to dxdt without the driftreport metrics for postmunge efficiency
    positiveratio = df[df[column + '_dxdt'] >= 0].shape[0] / df[column + '_dxdt'].shape[0]
    negativeratio = df[df[column + '_dxdt'] < 0].shape[0] / df[column + '_dxdt'].shape[0]
    zeroratio = df[df[column + '_dxdt'] == 0].shape[0] / df[column + '_dxdt'].shape[0]
    minimum = df[column + '_dxdt'].min()
    maximum = df[column + '_dxdt'].max()
    mean = df[column + '_dxdt'].mean()
    std = df[column + '_dxdt'].std()

    nmbrnormalization_dict = {column + '_dxdt' : {'positiveratio' : positiveratio, \
                                                  'negativeratio' : negativeratio, \
                                                  'zeroratio' : zeroratio, \
                                                  'minimum' : minimum, \
                                                  'maximum' : maximum, \
                                                  'mean' : mean, \
                                                  'std' : std, \
                                                  'periods' : periods}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:
      
      column_dict = { nc : {'category' : 'dxdt', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return df, column_dict_list

  def process_dxd2_class(self, df, column, category, postprocess_dict, params = {}):
    '''
    #process_dxd2_class(df, column, category, postprocess_dict)
    #function to translate a continues variable into a bounded variable
    #by taking delta of average of last two rows minus 
    #average of preceding two rows before that
    #should take a littel noise out of noisy data
    #assumes the rows are not shuffled and represent a continuous funciton 
    #with consistent time steps
    
    #for missing values, uses adjacent cell infill as default
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    #initialize parameters
    if 'periods' in params:
      periods = params['periods']
    else:
      periods = 2
    
    if inplace is not True:
      
      #copy source column into new column
      df, suffixoverlap_results = \
      self.df_copy_train(df, column, column + '_dxd2', suffixoverlap_results)
    
    else:
      
      suffixoverlap_results = \
      self.df_check_suffixoverlap(df, column + '_dxd2', suffixoverlap_results)
      
      df.rename(columns = {column : column + '_dxd2'}, inplace = True)
    
    #convert all values to either numeric or NaN
    df[column + '_dxd2'] = pd.to_numeric(df[column + '_dxd2'], errors='coerce')
    
    #apply ffill to replace NArows with value from adjacent cell in pre4ceding row
    df[column + '_dxd2'] = df[column + '_dxd2'].fillna(method='ffill')
    
    #we'll follow with a bfill just in case first row had a nan
    df[column + '_dxd2'] = df[column + '_dxd2'].fillna(method='bfill')  
    
#     #we're going to take difference of average of last two rows with two rows preceding
#     df[column + '_dxd2'] = (df[column + '_dxd2'] + df[column + '_dxd2'].shift()) / 2 \
#                            - ((df[column + '_dxd2'].shift(periods=2) + df[column + '_dxd2'].shift(periods=3)) / 2)

    suffixoverlap_results = \
    self.df_check_suffixoverlap(df, [column + '_temp1'], suffixoverlap_results)

    df[column + '_temp1'] = df[column + '_dxd2'].copy()
    # df_train['number7_temp3'] = df_train['number7'].copy()

    for i in range(periods-1):
      df[column + '_temp1'] = df[column + '_temp1'] + df[column + '_dxd2'].shift(periods = i+1)

    df[column + '_dxd2'] = (df[column + '_temp1'] - df[column + '_temp1'].shift(periods = periods)) / periods
    
    #first row will have a nan so just one more backfill
    df[column + '_dxd2'] = df[column + '_dxd2'].fillna(method='bfill')
    
    #then one more infill with to address scenario when data wasn't numeric
    #get arbitrary cell value, if one is nan then all will be
    value = df[column + '_dxd2'].values[0]
    if value != value:
      value = 0

      df[column + '_dxd2'] = df[column + '_dxd2'].fillna(value)
    
    del df[column + '_temp1']
    
    #create list of columns
    nmbrcolumns = [column + '_dxd2']

    #grab some driftreport metrics
    #note that if this function implemented for data streams at scale it may be appropriate
    #to consider creating an alternate to dxd2 without the driftreport metrics for postmunge efficiency
    positiveratio = df[df[column + '_dxd2'] >= 0].shape[0] / df[column + '_dxd2'].shape[0]
    negativeratio = df[df[column + '_dxd2'] < 0].shape[0] / df[column + '_dxd2'].shape[0]
    zeroratio = df[df[column + '_dxd2'] == 0].shape[0] / df[column + '_dxd2'].shape[0]
    minimum = df[column + '_dxd2'].min()
    maximum = df[column + '_dxd2'].max()
    mean = df[column + '_dxd2'].mean()
    std = df[column + '_dxd2'].std()
  
    nmbrnormalization_dict = {column + '_dxd2' : {'positiveratio' : positiveratio, \
                                                  'negativeratio' : negativeratio, \
                                                  'zeroratio' : zeroratio, \
                                                  'minimum' : minimum, \
                                                  'maximum' : maximum, \
                                                  'mean' : mean, \
                                                  'std' : std, \
                                                  'periods' : periods}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'dxd2', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return df, column_dict_list

  def process_shft_class(self, df, column, category, postprocess_dict, params = {}):
    '''
    #process_shft_class(df, column, category, postprocess_dict)
    #function to shift a sequential set forward by one or more time steps    
    #for missing values, uses adjacent cell infill as default
    #accepts parameter 'periods' for number of time steps, defaults to one
    #accepts parameter 'suffix' for column suffix appender
    #such as may be useful if applying this transform to the same column more than once
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    #initialize parameters
    if 'periods' in params:
      periods = params['periods']
    else:
      periods = 1
    if 'suffix' in params:
      suffix = params['suffix']
    else:
      suffix = 'shft'
      
    shft_column = column + '_' + suffix
    
    if inplace is not True:
      
      #copy source column into new column
      df, suffixoverlap_results = \
      self.df_copy_train(df, column, shft_column, suffixoverlap_results)
    
    else:
      
      suffixoverlap_results = \
      self.df_check_suffixoverlap(df, shft_column, suffixoverlap_results)
      
      df.rename(columns = {column : shft_column}, inplace = True)
    
    #convert all values to either numeric or NaN
    df[shft_column] = pd.to_numeric(df[shft_column], errors='coerce')
    
    #apply ffill to replace NArows with value from adjacent cell in pre4ceding row
    df[shft_column] = df[shft_column].fillna(method='ffill')
    
    #we'll follow with a bfill just in case first row had a nan
    df[shft_column] = df[shft_column].fillna(method='bfill') 
    
    #shift from preceding row
    df[shft_column] = df[shft_column].shift(periods = periods)
    
    #first row will have a nan so just one more backfill
    df[shft_column] = df[shft_column].fillna(method='bfill')
    
    #then one more infill with to address scenario when data wasn't numeric
    #get arbitrary cell value, if one is nan then all will be
    value = df[shft_column].values[0]
    if value != value:
      value = 0

      df[shft_column] = df[shft_column].fillna(value)
    
    #create list of columns
    nmbrcolumns = [shft_column]

    #grab some driftreport metrics
    #note that if this function implemented for data streams at scale it may be appropriate
    #to consider creating an alternate to dxdt without the driftreport metrics for postmunge efficiency
    positiveratio = df[df[shft_column] >= 0].shape[0] / df[shft_column].shape[0]
    negativeratio = df[df[shft_column] < 0].shape[0] / df[shft_column].shape[0]
    zeroratio = df[df[shft_column] == 0].shape[0] / df[shft_column].shape[0]
    minimum = df[shft_column].min()
    maximum = df[shft_column].max()
    mean = df[shft_column].mean()
    std = df[shft_column].std()

    nmbrnormalization_dict = {shft_column :      {'positiveratio' : positiveratio, \
                                                  'negativeratio' : negativeratio, \
                                                  'zeroratio' : zeroratio, \
                                                  'minimum' : minimum, \
                                                  'maximum' : maximum, \
                                                  'mean' : mean, \
                                                  'std' : std, \
                                                  'periods' : periods, \
                                                  'suffix' : suffix}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:
      
      column_dict = { nc : {'category' : 'shft', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return df, column_dict_list
  
  def process_shf2_class(self, df, column, category, postprocess_dict, params = {}):
    '''
    #process_shft_class(df, column, category, postprocess_dict)
    #function to shift a sequential set forward by one or more time steps    
    #for missing values, uses adjacent cell infill as default
    #accepts parameter 'periods' for number of time steps, defaults to one
    #accepts parameter 'suffix' for column suffix appender
    #such as may be useful if applying this transform to the same column more than once
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    #initialize parameters
    if 'periods' in params:
      periods = params['periods']
    else:
      periods = 2
    if 'suffix' in params:
      suffix = params['suffix']
    else:
      suffix = 'shf2'
      
    shft_column = column + '_' + suffix
    
    if inplace is not True:
      
      #copy source column into new column
      df, suffixoverlap_results = \
      self.df_copy_train(df, column, shft_column, suffixoverlap_results)
    
    else:
      
      suffixoverlap_results = \
      self.df_check_suffixoverlap(df, shft_column, suffixoverlap_results)
      
      df.rename(columns = {column : shft_column}, inplace = True)
    
    #convert all values to either numeric or NaN
    df[shft_column] = pd.to_numeric(df[shft_column], errors='coerce')
    
    #apply ffill to replace NArows with value from adjacent cell in pre4ceding row
    df[shft_column] = df[shft_column].fillna(method='ffill')
    
    #we'll follow with a bfill just in case first row had a nan
    df[shft_column] = df[shft_column].fillna(method='bfill') 
    
    #shift from preceding row
    df[shft_column] = df[shft_column].shift(periods = periods)
    
    #first row will have a nan so just one more backfill
    df[shft_column] = df[shft_column].fillna(method='bfill')
    
    #then one more infill with to address scenario when data wasn't numeric
    #get arbitrary cell value, if one is nan then all will be
    value = df[shft_column].values[0]
    if value != value:
      value = 0

      df[shft_column] = df[shft_column].fillna(value)
    
    #create list of columns
    nmbrcolumns = [shft_column]

    #grab some driftreport metrics
    #note that if this function implemented for data streams at scale it may be appropriate
    #to consider creating an alternate to dxdt without the driftreport metrics for postmunge efficiency
    positiveratio = df[df[shft_column] >= 0].shape[0] / df[shft_column].shape[0]
    negativeratio = df[df[shft_column] < 0].shape[0] / df[shft_column].shape[0]
    zeroratio = df[df[shft_column] == 0].shape[0] / df[shft_column].shape[0]
    minimum = df[shft_column].min()
    maximum = df[shft_column].max()
    mean = df[shft_column].mean()
    std = df[shft_column].std()

    nmbrnormalization_dict = {shft_column :      {'positiveratio' : positiveratio, \
                                                  'negativeratio' : negativeratio, \
                                                  'zeroratio' : zeroratio, \
                                                  'minimum' : minimum, \
                                                  'maximum' : maximum, \
                                                  'mean' : mean, \
                                                  'std' : std, \
                                                  'periods' : periods, \
                                                  'suffix' : suffix}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:
      
      column_dict = { nc : {'category' : 'shf2', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return df, column_dict_list
  
  def process_shf3_class(self, df, column, category, postprocess_dict, params = {}):
    '''
    #process_shft_class(df, column, category, postprocess_dict)
    #function to shift a sequential set forward by one or more time steps    
    #for missing values, uses adjacent cell infill as default
    #accepts parameter 'periods' for number of time steps, defaults to one
    #accepts parameter 'suffix' for column suffix appender
    #such as may be useful if applying this transform to the same column more than once
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    #initialize parameters
    if 'periods' in params:
      periods = params['periods']
    else:
      periods = 3
    if 'suffix' in params:
      suffix = params['suffix']
    else:
      suffix = 'shf3'
      
    shft_column = column + '_' + suffix
    
    if inplace is not True:
      
      #copy source column into new column
      df, suffixoverlap_results = \
      self.df_copy_train(df, column, shft_column, suffixoverlap_results)
    
    else:
      
      suffixoverlap_results = \
      self.df_check_suffixoverlap(df, shft_column, suffixoverlap_results)
      
      df.rename(columns = {column : shft_column}, inplace = True)
    
    #convert all values to either numeric or NaN
    df[shft_column] = pd.to_numeric(df[shft_column], errors='coerce')
    
    #apply ffill to replace NArows with value from adjacent cell in pre4ceding row
    df[shft_column] = df[shft_column].fillna(method='ffill')
    
    #we'll follow with a bfill just in case first row had a nan
    df[shft_column] = df[shft_column].fillna(method='bfill') 
    
    #shift from preceding row
    df[shft_column] = df[shft_column].shift(periods = periods)
    
    #first row will have a nan so just one more backfill
    df[shft_column] = df[shft_column].fillna(method='bfill')
    
    #then one more infill with to address scenario when data wasn't numeric
    #get arbitrary cell value, if one is nan then all will be
    value = df[shft_column].values[0]
    if value != value:
      value = 0

      df[shft_column] = df[shft_column].fillna(value)
    
    #create list of columns
    nmbrcolumns = [shft_column]

    #grab some driftreport metrics
    #note that if this function implemented for data streams at scale it may be appropriate
    #to consider creating an alternate to dxdt without the driftreport metrics for postmunge efficiency
    positiveratio = df[df[shft_column] >= 0].shape[0] / df[shft_column].shape[0]
    negativeratio = df[df[shft_column] < 0].shape[0] / df[shft_column].shape[0]
    zeroratio = df[df[shft_column] == 0].shape[0] / df[shft_column].shape[0]
    minimum = df[shft_column].min()
    maximum = df[shft_column].max()
    mean = df[shft_column].mean()
    std = df[shft_column].std()

    nmbrnormalization_dict = {shft_column :      {'positiveratio' : positiveratio, \
                                                  'negativeratio' : negativeratio, \
                                                  'zeroratio' : zeroratio, \
                                                  'minimum' : minimum, \
                                                  'maximum' : maximum, \
                                                  'mean' : mean, \
                                                  'std' : std, \
                                                  'periods' : periods, \
                                                  'suffix' : suffix}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:
      
      column_dict = { nc : {'category' : 'shf3', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return df, column_dict_list

  def process_MADn_class(self, mdf_train, mdf_test, column, category, \
                              postprocess_dict, params = {}):
    '''
    #process_MADn_class(mdf_train, mdf_test, column, category)
    #function to normalize data to mean of 0 and mean absolute deviation of 1
    #takes as arguement pandas dataframe of training and test data (mdf_train), (mdf_test)\
    #and the name of the column string ('column') and parent category (category)
    #replaces missing or improperly formatted data with mean of remaining values
    #returns same dataframes with new column of name column + '_MADn'
    #note this is a "dualprocess" function since is applied to both train and test dataframes
    #expect this approach works better than z-score for when the numerical distribution isn't thin tailed
    #if only have training but not test data handy, use same training data for both dataframe inputs
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    #adjinfill accepts True/False to change default infill from mean inputation to adjacent cell
    if 'adjinfill' in params:
      adjinfill = params['adjinfill']
    else:
      adjinfill = False
    
    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self.df_copy_train(mdf_train, column, column + '_MADn', suffixoverlap_results)

      mdf_test[column + '_MADn'] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self.df_check_suffixoverlap(mdf_train, column + '_MADn', suffixoverlap_results)
      
      mdf_train.rename(columns = {column : column + '_MADn'}, inplace = True)
      mdf_test.rename(columns = {column : column + '_MADn'}, inplace = True)

    #convert all values to either numeric or NaN
    mdf_train[column + '_MADn'] = pd.to_numeric(mdf_train[column + '_MADn'], errors='coerce')
    mdf_test[column + '_MADn'] = pd.to_numeric(mdf_test[column + '_MADn'], errors='coerce')
    
    #a few more metrics collected for driftreport
    #get maximum value of training column
    maximum = mdf_train[column + '_MADn'].max()
    #get minimum value of training column
    minimum = mdf_train[column + '_MADn'].min()

    #get mean of training data
    mean = mdf_train[column + '_MADn'].mean() 
    if mean != mean:
      mean = 0
      
    if adjinfill is True:
      mdf_train[column + '_MADn'] = mdf_train[column + '_MADn'].fillna(method='ffill')
      mdf_test[column + '_MADn'] = mdf_test[column + '_MADn'].fillna(method='ffill')
      mdf_train[column + '_MADn'] = mdf_train[column + '_MADn'].fillna(method='bfill')
      mdf_test[column + '_MADn'] = mdf_test[column + '_MADn'].fillna(method='bfill')

    #replace missing data with training set mean
    mdf_train[column + '_MADn'] = mdf_train[column + '_MADn'].fillna(mean)
    mdf_test[column + '_MADn'] = mdf_test[column + '_MADn'].fillna(mean)

    #subtract mean from column for both train and test
    mdf_train[column + '_MADn'] = mdf_train[column + '_MADn'] - mean
    mdf_test[column + '_MADn'] = mdf_test[column + '_MADn'] - mean

    #get mean absolute deviation of training data
    MAD = mdf_train[column + '_MADn'].mad()
    
    #special case to avoid div by 0
    if MAD == 0:
      MAD = 1

    #divide column values by mad for both training and test data
    mdf_train[column + '_MADn'] = mdf_train[column + '_MADn'] / MAD
    mdf_test[column + '_MADn'] = mdf_test[column + '_MADn'] / MAD

#     #change data type for memory savings
#     mdf_train[column + '_MADn'] = mdf_train[column + '_MADn'].astype(np.float32)
#     mdf_test[column + '_MADn'] = mdf_test[column + '_MADn'].astype(np.float32)

    #create list of columns
    nmbrcolumns = [column + '_MADn']

    nmbrnormalization_dict = {column + '_MADn' : {'mean' : mean, 'MAD' : MAD, \
                                                  'maximum':maximum, 'minimum':minimum, \
                                                  'adjinfill':adjinfill}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'MADn', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
    
    return mdf_train, mdf_test, column_dict_list

  def process_MAD3_class(self, mdf_train, mdf_test, column, category, \
                              postprocess_dict, params = {}):
    '''
    #process_MAD3_class(mdf_train, mdf_test, column, category)
    #function to normalize data by subtracting maximum and dividing by median absolute deviation
    #takes as arguement pandas dataframe of training and test data (mdf_train), (mdf_test)\
    #and the name of the column string ('column') and parent category (category)
    #replaces missing or improperly formatted data with mean of remaining values
    #returns same dataframes with new column of name column + '_MADn'
    #note this is a "dualprocess" function since is applied to both train and test dataframes
    #expect this approach works better than z-score for when the numerical distribution isn't thin tailed
    #if only have training but not test data handy, use same training data for both dataframe inputs
    #the use of maximum instead of mean for normalization based on comment from RWRI lectures 
    #documented in (anonymized)
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    #adjinfill accepts True/False to change default infill from mean inputation to adjacent cell
    if 'adjinfill' in params:
      adjinfill = params['adjinfill']
    else:
      adjinfill = False
    
    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self.df_copy_train(mdf_train, column, column + '_MAD3', suffixoverlap_results)

      mdf_test[column + '_MAD3'] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self.df_check_suffixoverlap(mdf_train, column + '_MAD3', suffixoverlap_results)
      
      mdf_train.rename(columns = {column : column + '_MAD3'}, inplace = True)
      mdf_test.rename(columns = {column : column + '_MAD3'}, inplace = True)

    #convert all values to either numeric or NaN
    mdf_train[column + '_MAD3'] = pd.to_numeric(mdf_train[column + '_MAD3'], errors='coerce')
    mdf_test[column + '_MAD3'] = pd.to_numeric(mdf_test[column + '_MAD3'], errors='coerce')
    
    #a few more metrics collected for driftreport
    #get maximum value of training column
    maximum = mdf_train[column + '_MAD3'].max()
    #get minimum value of training column
    minimum = mdf_train[column + '_MAD3'].min()

    #get mean of training data
    mean = mdf_train[column + '_MAD3'].mean()
    if mean != mean:
      mean = 0
      
    if adjinfill is True:
      mdf_train[column + '_MAD3'] = mdf_train[column + '_MAD3'].fillna(method='ffill')
      mdf_test[column + '_MAD3'] = mdf_test[column + '_MAD3'].fillna(method='ffill')
      mdf_train[column + '_MAD3'] = mdf_train[column + '_MAD3'].fillna(method='bfill')
      mdf_test[column + '_MAD3'] = mdf_test[column + '_MAD3'].fillna(method='bfill')
    
    #replace missing data with training set mean
    mdf_train[column + '_MAD3'] = mdf_train[column + '_MAD3'].fillna(mean)
    mdf_test[column + '_MAD3'] = mdf_test[column + '_MAD3'].fillna(mean)

    #get max of training data
    datamax = mdf_train[column + '_MAD3'].max()
    
    #get mean absolute deviation of training data
    MAD = mdf_train[column + '_MAD3'].mad()
    
    #special case to avoid div by 0
    if MAD == 0:
      MAD = 1
    
    #subtract max from column for both train and test
    mdf_train[column + '_MAD3'] = mdf_train[column + '_MAD3'] - datamax
    mdf_test[column + '_MAD3'] = mdf_test[column + '_MAD3'] - datamax

    #divide column values by mad for both training and test data
    mdf_train[column + '_MAD3'] = mdf_train[column + '_MAD3'] / MAD
    mdf_test[column + '_MAD3'] = mdf_test[column + '_MAD3'] / MAD

#     #change data type for memory savings
#     mdf_train[column + '_MAD3'] = mdf_train[column + '_MAD3'].astype(np.float32)
#     mdf_test[column + '_MAD3'] = mdf_test[column + '_MAD3'].astype(np.float32)

    #create list of columns
    nmbrcolumns = [column + '_MAD3']

    nmbrnormalization_dict = {column + '_MAD3' : {'mean' : mean, 'MAD' : MAD, 'datamax' : datamax, \
                                                  'maximum':maximum, 'minimum':minimum, 'adjinfill':adjinfill}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'MAD3', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return mdf_train, mdf_test, column_dict_list

  def process_mnmx_class(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    #process_mnmx_class(mdf_train, mdf_test, column, category)
    #function to scale data to minimum of 0 and maximum of 1 \
    #based on min/max values from training set for this column
    #takes as arguement pandas dataframe of training and test data (mdf_train), (mdf_test)\
    #and the name of the column string ('column') and parent category (category)
    #replaces missing or improperly formatted data with mean of remaining values
    #returns same dataframes with new column of name column + '_mnmx'
    #note this is a "dualprocess" function since is applied to both dataframes
    #expect this approach works better when the numerical distribution is thin tailed
    #if only have training but not test data handy, use same training data for both
    #dataframe inputs
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    #for cap ands floor, False means not applied, True means based on set's found max/min in train set
    
    #initialize parameters
    #cap can be passed as True for max of training data or as a specific value prior to normalizaiton, False for no cap
    if 'cap' in params:
      cap = params['cap']
    else:
      cap = False
      
    #floor can be passed as True for min of training data or as a specific value prior to normalizaiton, False for no floor
    if 'floor' in params:
      floor = params['floor']
    else:
      floor = False
      
    #adjinfill accepts True/False to change default infill from mean inputation to adjacent cell
    if 'adjinfill' in params:
      adjinfill = params['adjinfill']
    else:
      adjinfill = False
    
    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self.df_copy_train(mdf_train, column, column + '_mnmx', suffixoverlap_results)

      mdf_test[column + '_mnmx'] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self.df_check_suffixoverlap(mdf_train, column + '_mnmx', suffixoverlap_results)
      
      mdf_train.rename(columns = {column : column + '_mnmx'}, inplace = True)
      mdf_test.rename(columns = {column : column + '_mnmx'}, inplace = True)

    #convert all values to either numeric or NaN
    mdf_train[column + '_mnmx'] = pd.to_numeric(mdf_train[column + '_mnmx'], errors='coerce')
    mdf_test[column + '_mnmx'] = pd.to_numeric(mdf_test[column + '_mnmx'], errors='coerce')
    
    #a few more metrics collected for driftreport
    #get standard deviation of training data
    std = mdf_train[column + '_mnmx'].std()

    #get mean of training data
    mean = mdf_train[column + '_mnmx'].mean()   
    if mean != mean:
      mean = 0
      
    if adjinfill is True:
      mdf_train[column + '_mnmx'] = mdf_train[column + '_mnmx'].fillna(method='ffill')
      mdf_test[column + '_mnmx'] = mdf_test[column + '_mnmx'].fillna(method='ffill')
      mdf_train[column + '_mnmx'] = mdf_train[column + '_mnmx'].fillna(method='bfill')
      mdf_test[column + '_mnmx'] = mdf_test[column + '_mnmx'].fillna(method='bfill')

    #replace missing data with training set mean
    mdf_train[column + '_mnmx'] = mdf_train[column + '_mnmx'].fillna(mean)
    mdf_test[column + '_mnmx'] = mdf_test[column + '_mnmx'].fillna(mean)
    
    #get maximum value of training column
    maximum = mdf_train[column + '_mnmx'].max()
    
    #get minimum value of training column
    minimum = mdf_train[column + '_mnmx'].min()
    
    #if cap < maximum, maximum = cap
    if cap is not False and cap is not True:
      if cap < maximum:
        maximum = cap
    if floor is not False and floor is not True:
      if floor > minimum:
        minimum = floor
    
    #avoid outlier div by zero when max = min
    maxminusmin = maximum - minimum
    if maxminusmin == 0:
      maxminusmin = 1
    
    #perform min-max scaling to train and test sets using values from train
    mdf_train[column + '_mnmx'] = (mdf_train[column + '_mnmx'] - minimum) / \
                                  (maxminusmin)
    
    mdf_test[column + '_mnmx'] = (mdf_test[column + '_mnmx'] - minimum) / \
                                 (maxminusmin)

    #cap and floor application
    if cap is True:
      cap = maximum
    if floor is True:
      floor = minimum
    
    if cap is not False:
      #replace values in test > cap with cap
      mdf_train.loc[mdf_train[column + '_mnmx'] > (cap - minimum)/maxminusmin, (column + '_mnmx')] \
      = (cap - minimum)/maxminusmin
      
      mdf_test.loc[mdf_test[column + '_mnmx'] > (cap - minimum)/maxminusmin, (column + '_mnmx')] \
      = (cap - minimum)/maxminusmin
    
    if floor is not False:
      #replace values in test < floor with floor
      mdf_train.loc[mdf_train[column + '_mnmx'] < (floor - minimum)/maxminusmin, (column + '_mnmx')] \
      = (floor - minimum)/maxminusmin
      
      mdf_test.loc[mdf_test[column + '_mnmx'] < (floor - minimum)/maxminusmin, (column + '_mnmx')] \
      = (floor - minimum)/maxminusmin
    
    #create list of columns
    nmbrcolumns = [column + '_mnmx']

    nmbrnormalization_dict = {column + '_mnmx' : {'minimum' : minimum, \
                                                  'maximum' : maximum, \
                                                  'maxminusmin' : maxminusmin, \
                                                  'mean' : mean, \
                                                  'std' : std, \
                                                  'cap' : cap, \
                                                  'floor' : floor, \
                                                  'adjinfill' : adjinfill}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'mnmx', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return mdf_train, mdf_test, column_dict_list

  def process_mnm3_class(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    #process_mnmx_class(mdf_train, mdf_test, column, category)
    #function to scale data to minimum of 0 and maximum of 1 \
    #after replacing extreme values above the 0.99 quantile with
    #the value of 0.99 quantile and extreme values below the 0.01
    #quantile with the value of 0.01 quantile
    #(accepts parameters qmax and qmin to customize these 0.99/0.01 values)
    #takes as arguement pandas dataframe of training and test data (mdf_train), (mdf_test)\
    #and the name of the column string ('column') and parent category (category)
    #replaces missing or improperly formatted data with mean of remaining values
    #returns same dataframes with new column of name column + '_mnmx'
    #note this is a "dualprocess" function since is applied to both dataframes
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False

    #initialize parameters
    if 'qmax' in params:
      qmax = params['qmax']
    else:
      qmax = 0.99
      
    if 'qmin' in params:
      qmin = params['qmin']
    else:
      qmin = 0.01
      
    #adjinfill accepts True/False to change default infill from mean inputation to adjacent cell
    if 'adjinfill' in params:
      adjinfill = params['adjinfill']
    else:
      adjinfill = False
    
    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self.df_copy_train(mdf_train, column, column + '_mnm3', suffixoverlap_results)

      mdf_test[column + '_mnm3'] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self.df_check_suffixoverlap(mdf_train, column + '_mnm3', suffixoverlap_results)
      
      mdf_train.rename(columns = {column : column + '_mnm3'}, inplace = True)
      mdf_test.rename(columns = {column : column + '_mnm3'}, inplace = True)

    #convert all values to either numeric or NaN
    mdf_train[column + '_mnm3'] = pd.to_numeric(mdf_train[column + '_mnm3'], errors='coerce')
    mdf_test[column + '_mnm3'] = pd.to_numeric(mdf_test[column + '_mnm3'], errors='coerce')
    
    #a few more metrics collected for driftreport
    #get standard deviation of training data
    std = mdf_train[column + '_mnm3'].std()

    #get maximum value of training column
    quantilemax = mdf_train[column + '_mnm3'].quantile(qmax)
    
    if quantilemax != quantilemax:
      quantilemax = 0

    #get minimum value of training column
    quantilemin = mdf_train[column + '_mnm3'].quantile(qmin)
    
    if quantilemin != quantilemin:
      quantilemin = 0

    #replace values > quantilemax with quantilemax
    mdf_train.loc[mdf_train[column + '_mnm3'] > quantilemax, (column + '_mnm3')] \
    = quantilemax
    mdf_test.loc[mdf_test[column + '_mnm3'] > quantilemax, (column + '_mnm3')] \
    = quantilemax
    #replace values < quantile10 with quantile10
    mdf_train.loc[mdf_train[column + '_mnm3'] < quantilemin, (column + '_mnm3')] \
    = quantilemin
    mdf_test.loc[mdf_test[column + '_mnm3'] < quantilemin, (column + '_mnm3')] \
    = quantilemin

    #note this step is now performed after the quantile evaluation / replacement

    #get mean of training data
    mean = mdf_train[column + '_mnm3'].mean()    
    if mean != mean:
      mean = 0
      
    if adjinfill is True:
      mdf_train[column + '_mnm3'] = mdf_train[column + '_mnm3'].fillna(method='ffill')
      mdf_test[column + '_mnm3'] = mdf_test[column + '_mnm3'].fillna(method='ffill')
      mdf_train[column + '_mnm3'] = mdf_train[column + '_mnm3'].fillna(method='bfill')
      mdf_test[column + '_mnm3'] = mdf_test[column + '_mnm3'].fillna(method='bfill')
    
    #replace missing data with training set mean
    mdf_train[column + '_mnm3'] = mdf_train[column + '_mnm3'].fillna(mean)
    mdf_test[column + '_mnm3'] = mdf_test[column + '_mnm3'].fillna(mean)
    
    #avoid outlier div by zero when max = min
    maxminusmin = quantilemax - quantilemin
    if maxminusmin == 0:
      maxminusmin = 1

    #perform min-max scaling to train and test sets using values from train
    mdf_train[column + '_mnm3'] = (mdf_train[column + '_mnm3'] - quantilemin) / \
                                  (maxminusmin)

    mdf_test[column + '_mnm3'] = (mdf_test[column + '_mnm3'] - quantilemin) / \
                                 (maxminusmin)

#     #change data type for memory savings
#     mdf_train[column + '_mnm3'] = mdf_train[column + '_mnm3'].astype(np.float32)
#     mdf_test[column + '_mnm3'] = mdf_test[column + '_mnm3'].astype(np.float32)
    
    #create list of columns
    nmbrcolumns = [column + '_mnm3']

    nmbrnormalization_dict = {column + '_mnm3' : {'quantilemin' : quantilemin, \
                                                  'quantilemax' : quantilemax, \
                                                  'maxminusmin' : maxminusmin, \
                                                  'mean' : mean, \
                                                  'std' : std, \
                                                  'qmax' : qmax, \
                                                  'qmin' : qmin, \
                                                  'adjinfill' : adjinfill }}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'mnm3', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())

    return mdf_train, mdf_test, column_dict_list
  
  def process_retn_class(self, mdf_train, mdf_test, column, category, postprocess_dict, params = {}):
    """
    #process_retn_class(mdf_train, mdf_test, column, category)
    #function to scale data as follows:
    
    # if max >= 0 and min <= 0:
    #   #scaling based on 
    #   x = x / (max - min)

    # elif max >= 0 and min >= 0:
    #   #traditional min/max
    #   x = (x - min) / (max - min)
    
    # elif max <= 0 and min <= 0:
    #   #max/min (retains negative values)
    #   x = (x - max) / (max - min)
    
    #replaces missing or improperly formatted data with mean of remaining values
    
    #returns same dataframes with new column of name column + '_retn'
    #note this is a "dualprocess" function since is applied to both dataframes
    
    #note with parameters divisor can also be set as standard deviation
    #also aprameters accepted for cap/floor/mulitplier/offset
    #where cap/floor based on pretransform values
    #multiplier/offset based on posttransform values, muoltiplier applied betfore offset
    """
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    #initialize parameters
    
    #accepts divisor parameters of 'minmax' or 'std', eg divisor for normalization equation
    #note that standard deviation doesn't have same properties for sign retention when all values > or < 0
    if 'divisor' in params:
      divisor = params['divisor']
    else:
      divisor = 'minmax'
    
    #offset is just an added constant applied after multiplier
    if 'offset' in params:
      offset = params['offset']
    else:
      offset = 0
    
    #multiplier scales the set by multiplication prior to offset
    if 'multiplier' in params:
      multiplier = params['multiplier']
    else:
      multiplier = 1
    
    #cap can be passed as True for max of training data or as a specific value prior to normalization, False for no cap
    if 'cap' in params:
      cap = params['cap']
    else:
      cap = False
    
    #floor can be passed as True for min of training data or as a specific value prior to normalization, False for no floor
    if 'floor' in params:
      floor = params['floor']
    else:
      floor = False
      
    #adjinfill accepts True/False to change default infill from mean inputation to adjacent cell
    if 'adjinfill' in params:
      adjinfill = params['adjinfill']
    else:
      adjinfill = False
    
    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self.df_copy_train(mdf_train, column, column + '_retn', suffixoverlap_results)

      mdf_test[column + '_retn'] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self.df_check_suffixoverlap(mdf_train, column + '_retn', suffixoverlap_results)
      
      mdf_train.rename(columns = {column : column + '_retn'}, inplace = True)
      mdf_test.rename(columns = {column : column + '_retn'}, inplace = True)

    #convert all values to either numeric or NaN
    mdf_train[column + '_retn'] = pd.to_numeric(mdf_train[column + '_retn'], errors='coerce')
    mdf_test[column + '_retn'] = pd.to_numeric(mdf_test[column + '_retn'], errors='coerce')
    
    #a few more metrics collected for driftreport
    #get standard deviation of training data
    std = mdf_train[column + '_retn'].std()
    
    mad = mdf_train[column + '_retn'].mad()
    
    #get maximum value of training column
    maximum = mdf_train[column + '_retn'].max()
    
    #get minimum value of training column
    minimum = mdf_train[column + '_retn'].min()
    
    #avoid outlier div by zero when max = min
    maxminusmin = maximum - minimum
    if maxminusmin == 0 or maxminusmin != maxminusmin:
      maxminusmin = 1
      
    if std != std or std == 0:
      std = 1
      
    if mad != mad or mad == 0:
      mad = 1
      
    #if cap < maximum, maximum = cap
    if cap is not False and cap is not True:
      if cap < maximum:
        maximum = cap
    if floor is not False and floor is not True:
      if floor > minimum:
        minimum = floor
        
    #cap and floor application
    if cap is True:
      cap = maximum
    if floor is True:
      floor = minimum
      
    if cap is not False:
      #replace values in test > cap with cap
      mdf_train.loc[mdf_train[column + '_retn'] > cap, (column + '_retn')] \
      = cap
      
      mdf_test.loc[mdf_test[column + '_retn'] > cap, (column + '_retn')] \
      = cap
    
    if floor is not False:
      #replace values in test < floor with floor
      mdf_train.loc[mdf_train[column + '_retn'] < floor, (column + '_retn')] \
      = floor
      
      mdf_test.loc[mdf_test[column + '_retn'] < floor, (column + '_retn')] \
      = floor
      
    #get mean of training data
    mean = mdf_train[column + '_retn'].mean()
    if mean != mean:
      mean = 0
      
    if adjinfill is True:
      mdf_train[column + '_retn'] = mdf_train[column + '_retn'].fillna(method='ffill')
      mdf_test[column + '_retn'] = mdf_test[column + '_retn'].fillna(method='ffill')
      mdf_train[column + '_retn'] = mdf_train[column + '_retn'].fillna(method='bfill')
      mdf_test[column + '_retn'] = mdf_test[column + '_retn'].fillna(method='bfill')

    #replace missing data with training set mean
    mdf_train[column + '_retn'] = mdf_train[column + '_retn'].fillna(mean)
    mdf_test[column + '_retn'] = mdf_test[column + '_retn'].fillna(mean)
    
    #edge case (only neccesary so scalingapproach is assigned)
    if maximum != maximum:
      maximum = 0
    if minimum != minimum:
      minimum = 0
    
    #divisor
    if divisor not in ['minmax', 'std', 'mad']:
      print("Error: retn transform parameter 'divisor' only accepts entries of 'minmax' 'mad' or 'std'")
    if divisor == 'minmax':
      divisor = maxminusmin
    elif divisor == 'mad':
      divisor = mad
    else:
      divisor = std
      
    if divisor == 0 or divisor != divisor:
      divisor = 1
    
    #driftreport metric scalingapproach returned as 'retn' or 'mnmx' or 'mxmn'
    #where mnmx is for cases where all values in train set are positive
    #mxmn is for cases where all values in train set are negative
    
    if maximum >= 0 and minimum <= 0:
      
      mdf_train[column + '_retn'] = (mdf_train[column + '_retn']) / \
                                    (divisor) * multiplier + offset
      
      mdf_test[column + '_retn'] = (mdf_test[column + '_retn']) / \
                                    (divisor) * multiplier + offset
      
      scalingapproach = 'retn'
      
    elif maximum >= 0 and minimum >= 0:
    
      #perform min-max scaling to train and test sets using values from train
      mdf_train[column + '_retn'] = (mdf_train[column + '_retn'] - minimum) / \
                                    (divisor) * multiplier + offset

      mdf_test[column + '_retn'] = (mdf_test[column + '_retn'] - minimum) / \
                                   (divisor) * multiplier + offset
      
      scalingapproach = 'mnmx'
      
    elif maximum <= 0 and minimum <= 0:
    
      #perform min-max scaling to train and test sets using values from train
      mdf_train[column + '_retn'] = (mdf_train[column + '_retn'] - maximum) / \
                                    (divisor) * multiplier + offset

      mdf_test[column + '_retn'] = (mdf_test[column + '_retn'] - maximum) / \
                                   (divisor) * multiplier + offset
      
      scalingapproach = 'mxmn'
    
    #create list of columns
    nmbrcolumns = [column + '_retn']

    nmbrnormalization_dict = {column + '_retn' : {'minimum' : minimum, \
                                                  'maximum' : maximum, \
                                                  'mean' : mean, \
                                                  'std' : std, \
                                                  'mad' : mad, \
                                                  'scalingapproach' : scalingapproach, \
                                                  'offset' : offset, \
                                                  'multiplier': multiplier, \
                                                  'cap' : cap, \
                                                  'floor' : floor, \
                                                  'divisor' : divisor, \
                                                  'adjinfill' : adjinfill}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'retn', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return mdf_train, mdf_test, column_dict_list

  def process_mean_class(self, mdf_train, mdf_test, column, category, postprocess_dict, params = {}):
    '''
    #process_mean_class(mdf_train, mdf_test, column, category)
    #function to scale data to minimum of 0 and maximum of 1 \
    #based on min/max values from training set for this column
    #takes as arguement pandas dataframe of training and test data (mdf_train), (mdf_test)\
    #and the name of the column string ('column') and parent category (category)
    #replaces missing or improperly formatted data with mean of remaining values
    #returns same dataframes with new column of name column + '_mnmx'
    #note this is a "dualprocess" function since is applied to both dataframes
    #expect this approach works better when the numerical distribution is thin tailed
    #if only have training but not test data handy, use same training data for both
    #dataframe inputs
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    #initialize parameters
    if 'offset' in params:
      offset = params['offset']
    else:
      offset = 0
      
    if 'multiplier' in params:
      multiplier = params['multiplier']
    else:
      multiplier = 1
    
    if 'cap' in params:
      cap = params['cap']
    else:
      cap = False
      
    if 'floor' in params:
      floor = params['floor']
    else:
      floor = False
      
    #adjinfill accepts True/False to change default infill from mean inputation to adjacent cell
    if 'adjinfill' in params:
      adjinfill = params['adjinfill']
    else:
      adjinfill = False
    
    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self.df_copy_train(mdf_train, column, column + '_mean', suffixoverlap_results)

      mdf_test[column + '_mean'] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self.df_check_suffixoverlap(mdf_train, column + '_mean', suffixoverlap_results)
      
      mdf_train.rename(columns = {column : column + '_mean'}, inplace = True)
      mdf_test.rename(columns = {column : column + '_mean'}, inplace = True)

    #convert all values to either numeric or NaN
    mdf_train[column + '_mean'] = pd.to_numeric(mdf_train[column + '_mean'], errors='coerce')
    mdf_test[column + '_mean'] = pd.to_numeric(mdf_test[column + '_mean'], errors='coerce')
    
    #get maximum value of training column
    maximum = mdf_train[column + '_mean'].max()
    
    #get minimum value of training column
    minimum = mdf_train[column + '_mean'].min()
    
    #if cap < maximum, maximum = cap
    if cap is not False and cap is not True:
      if cap < maximum:
        maximum = cap
    if floor is not False and floor is not True:
      if floor > minimum:
        minimum = floor
        
    #cap and floor application
    if cap is True:
      cap = maximum
    if floor is True:
      floor = minimum
      
    if cap is not False:
      #replace values in test > cap with cap
      mdf_train.loc[mdf_train[column + '_mean'] > cap, (column + '_mean')] \
      = cap
      
      mdf_test.loc[mdf_test[column + '_mean'] > cap, (column + '_mean')] \
      = cap
    
    if floor is not False:
      #replace values in test < floor with floor
      mdf_train.loc[mdf_train[column + '_mean'] < floor, (column + '_mean')] \
      = floor
      
      mdf_test.loc[mdf_test[column + '_mean'] < floor, (column + '_mean')] \
      = floor
    
    #a few more metrics collected for driftreport
    #get standard deviation of training data
    std = mdf_train[column + '_mean'].std()

    #get mean of training data
    mean = mdf_train[column + '_mean'].mean()
    if mean != mean:
      mean = 0

    if adjinfill is True:
      mdf_train[column + '_mean'] = mdf_train[column + '_mean'].fillna(method='ffill')
      mdf_test[column + '_mean'] = mdf_test[column + '_mean'].fillna(method='ffill')
      mdf_train[column + '_mean'] = mdf_train[column + '_mean'].fillna(method='bfill')
      mdf_test[column + '_mean'] = mdf_test[column + '_mean'].fillna(method='bfill')
      
    #replace missing data with training set mean
    mdf_train[column + '_mean'] = mdf_train[column + '_mean'].fillna(mean)
    mdf_test[column + '_mean'] = mdf_test[column + '_mean'].fillna(mean)
    
    #avoid outlier div by zero when max = min
    maxminusmin = maximum - minimum
    if maxminusmin == 0 or maxminusmin != maxminusmin:
      maxminusmin = 1
    
    #perform min-max scaling to train and test sets using values from train
    mdf_train[column + '_mean'] = (mdf_train[column + '_mean'] - mean) / \
                                  (maxminusmin) * multiplier + offset
    
    mdf_test[column + '_mean'] = (mdf_test[column + '_mean'] - mean) / \
                                 (maxminusmin) * multiplier + offset

#     #change data type for memory savings
#     mdf_train[column + '_mnmx'] = mdf_train[column + '_mnmx'].astype(np.float32)
#     mdf_test[column + '_mnmx'] = mdf_test[column + '_mnmx'].astype(np.float32)
    
    #create list of columns
    nmbrcolumns = [column + '_mean']

    nmbrnormalization_dict = {column + '_mean' : {'minimum' : minimum, \
                                                  'maximum' : maximum, \
                                                  'maxminusmin' : maxminusmin, \
                                                  'mean' : mean, \
                                                  'std' : std, \
                                                  'offset' : offset, \
                                                  'multiplier': multiplier, \
                                                  'cap' : cap, \
                                                  'floor' : floor, \
                                                  'adjinfill' : adjinfill}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'mean', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return mdf_train, mdf_test, column_dict_list

  def process_binary_class(self, mdf_train, mdf_test, column, category, \
                           postprocess_dict, params = {}):
    '''
    #process_binary_class(mdf, column, missing)
    #converts binary classification values to 0 or 1
    #takes as arguement a pandas dataframe (mdf_train, mdf_test), \
    #the name of the column string ('column') \
    #and the category from parent columkn (category)
    #fills missing valules with most common value
    #returns same dataframes with new column of name column + '_bnry'
    #note this is a "dualprocess" function since is applied to both dataframes
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    #adjinfill accepts True/False to change default infill from mean inputation to adjacent cell
    if 'adjinfill' in params:
      adjinfill = params['adjinfill']
    else:
      adjinfill = False
      
    #str_convert provides consistent encodings between numbers and string equivalent, eg 2 == '2'
    if 'str_convert' in params:
      str_convert = params['str_convert']
    else:
      str_convert = False
    
    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self.df_copy_train(mdf_train, column, column + '_bnry', suffixoverlap_results)

      mdf_test[column + '_bnry'] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self.df_check_suffixoverlap(mdf_train, column + '_bnry', suffixoverlap_results)
      
      mdf_train.rename(columns = {column : column + '_bnry'}, inplace = True)
      mdf_test.rename(columns = {column : column + '_bnry'}, inplace = True)
    
    if str_convert is True:
      mdf_train[column + '_bnry'] = mdf_train[column + '_bnry'].astype(str)
      mdf_test[column + '_bnry'] = mdf_test[column + '_bnry'].astype(str)

    #create plug value for missing cells as most common value
    valuecounts = pd.DataFrame(mdf_train[column + '_bnry'].value_counts())
    valuecounts = valuecounts.rename_axis('zzzinfill').sort_values(by = [column + '_bnry', 'zzzinfill'], ascending = [False, True])
    valuecounts = list(valuecounts.index)
    
    if len(valuecounts) > 0:

      if len(valuecounts) > 1:
        binary_missing_plug = valuecounts[0]
      else:
        #making an executive decision here to deviate from standardinfill of most common value
        #for this edge case where a column evaluated as binary has only single value and NaN's
        binary_missing_plug = 'zzzinfill'

      #test for nan
      if binary_missing_plug != binary_missing_plug:
        binary_missing_plug = valuecounts[1]

      #edge case when applying this transform to set with >2 values
      #this only comes up when caluclating driftreport in postmunge
      extravalues = []
      if len(valuecounts) > 2:
        i=0
        for value in valuecounts:
          if i>1:
            extravalues.append(value)
          i+=1

      #replace nan in valuecounts with binary_missing_plug so we can sort
      valuecounts = [x if x == x else binary_missing_plug for x in valuecounts]
  #     #convert everything to string for sort
  #     valuecounts = [str(x) for x in valuecounts]

      #note LabelBinarizer encodes alphabetically, with 1 assigned to first and 0 to second
      #we'll take different approach of going by most common value to 1 unless 0 or 1
      #are already in the set then we'll defer to keeping those designations in place
      #there's some added complexity here to deal with edge case of passing this function
      #to a set with >2 values as we might run into when caluclating drift in postmunge

  #     valuecounts.sort()
  #     valuecounts = sorted(valuecounts)
      #in case this includes both strings and integers for instance we'll sort this way
  #     valuecounts = sorted(valuecounts, key=lambda p: str(p))

      #we'll save these in the normalization dictionary for future reference
      onevalue = valuecounts[0]
      if len(valuecounts) > 1:
        zerovalue = valuecounts[1]
      else:
        zerovalue = 'zzzinfill'

      #special case for when the source column is already encoded as 0/1

      if len(valuecounts) <= 2:

        if 0 in valuecounts:
          zerovalue = 0
          if 1 in valuecounts:
            onevalue = 1
          else:
            if valuecounts[0] == 0:
              if len(valuecounts) > 1:
                onevalue = valuecounts[1]
              else:
                onevalue = 'zzzinfill'

        if 1 in valuecounts:
          if 0 not in valuecounts:
            if valuecounts[0] != 1:
              onevalue = 1
              zerovalue = valuecounts[0]

      #edge case same as above but when values of 0 or 1. are in set and 
      #len(valuecounts) > 2
      if len(valuecounts) > 2:
        valuecounts2 = valuecounts[:2]

        if 0 in valuecounts2:
          zerovalue = 0
          if 1 in valuecounts2:
            onevalue = 1
          else:
            if valuecounts2[0] == 0:
              if len(valuecounts) > 1:
                onevalue = valuecounts2[1]
              else:
                onevalue = 'zzzinfill'

        if 1 in valuecounts2:
          if 0 not in valuecounts2:
            if valuecounts2[0] != 1:
              onevalue = 1
              zerovalue = valuecounts2[0]

      #edge case that might come up in drift report
      if binary_missing_plug not in [onevalue, zerovalue]:
        binary_missing_plug = onevalue

      #edge case when applying this transform to set with >2 values
      #this only comes up when caluclating driftreport in postmunge
      if len(valuecounts) > 2:
        for value in extravalues:
          mdf_train[column + '_bnry'] = \
          np.where(mdf_train[column + '_bnry'] == value, binary_missing_plug, mdf_train[column + '_bnry'])
          mdf_test[column + '_bnry'] = \
          np.where(mdf_test[column + '_bnry'] == value, binary_missing_plug, mdf_test[column + '_bnry'])
          
      if adjinfill is True:
        mdf_train[column + '_bnry'] = mdf_train[column + '_bnry'].fillna(method='ffill')
        mdf_test[column + '_bnry'] = mdf_test[column + '_bnry'].fillna(method='ffill')
        mdf_train[column + '_bnry'] = mdf_train[column + '_bnry'].fillna(method='bfill')
        mdf_test[column + '_bnry'] = mdf_test[column + '_bnry'].fillna(method='bfill')

      #replace missing data with specified classification
      mdf_train[column + '_bnry'] = mdf_train[column + '_bnry'].fillna(binary_missing_plug)
      mdf_test[column + '_bnry'] = mdf_test[column + '_bnry'].fillna(binary_missing_plug)

      #this addressess issue where nunique for mdftest > than that for mdf_train
      #note is currently an oportunity for improvement that NArows won't identify these poinsts as candiadates
      #for user specified infill, and as currently addressed will default to infill with most common value
      #in the mean time a workaround could be for user to manually replace extra values with nan prior to
      #postmunge application such as if they want to apply ML infill
      #this will only be an issue when nunique for df_train == 2, and nunique for df_test > 2
      #if len(mdf_test[column + '_bnry'].unique()) > 2:
      uniqueintest = mdf_test[column + '_bnry'].unique()
      for unique in uniqueintest:
        if unique not in [onevalue, zerovalue]:
          mdf_test[column + '_bnry'] = \
          np.where(mdf_test[column + '_bnry'] == unique, binary_missing_plug, mdf_test[column + '_bnry'])

      #convert column to binary 0/1 classification (replaces scikit LabelBinarizer)
      mdf_train[column + '_bnry'] = np.where(mdf_train[column + '_bnry'] == onevalue, 1, 0)
      mdf_test[column + '_bnry'] = np.where(mdf_test[column + '_bnry'] == onevalue, 1, 0)

      #create list of columns
      bnrycolumns = [column + '_bnry']

      #change data types to 8-bit (1 byte) integers for memory savings
      mdf_train[column + '_bnry'] = mdf_train[column + '_bnry'].astype(np.int8)
      mdf_test[column + '_bnry'] = mdf_test[column + '_bnry'].astype(np.int8)

      #a few more metrics collected for driftreport
      oneratio = mdf_train[column + '_bnry'].sum() / mdf_train[column + '_bnry'].shape[0]
      zeroratio = (mdf_train[column + '_bnry'].shape[0] - mdf_train[column + '_bnry'].sum() )\
                  / mdf_train[column + '_bnry'].shape[0]

      #create list of columns associated with categorical transform (blank for now)
      categorylist = []
    
    else:
      mdf_train[column + '_bnry'] = 0
      mdf_test[column + '_bnry'] = 0
      
      binary_missing_plug = 0
      onevalue = 1
      zerovalue = 0
      extravalues = 0
      oneratio = 0
      zeroratio = 0
      bnrycolumns = [column + '_bnry']

  #     bnrynormalization_dict = {column + '_bnry' : {'missing' : binary_missing_plug, \
  #                                                   'onevalue' : onevalue, \
  #                                                   'zerovalue' : zerovalue}}
    
    bnrynormalization_dict = {column + '_bnry' : {'missing' : binary_missing_plug, \
                                                  1 : onevalue, \
                                                  0 : zerovalue, \
                                                  'extravalues' : extravalues, \
                                                  'oneratio' : oneratio, \
                                                  'zeroratio' : zeroratio, \
                                                  'adjinfill' : adjinfill, \
                                                  'str_convert' : str_convert}}

    #store some values in the column_dict{} for use later in ML infill methods
    column_dict_list = []

    for bc in bnrycolumns:

      column_dict = { bc : {'category' : 'bnry', \
                           'origcategory' : category, \
                           'normalization_dict' : bnrynormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : bnrycolumns, \
                           'categorylist' : bnrycolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())

    return mdf_train, mdf_test, column_dict_list

  def process_binary2_class(self, mdf_train, mdf_test, column, category, postprocess_dict, params = {}):
    '''
    #process_binary2_class(mdf, column, missing)
    #converts binary classification values to 0 or 1
    #takes as arguement a pandas dataframe (mdf_train, mdf_test), \
    #the name of the column string ('column') \
    #and the category from parent columkn (category)
    #fills missing valules with least common value (different than bnry)
    #returns same dataframes with new column of name column + '_bnry'
    #note this is a "dualprocess" function since is applied to both dataframes
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    #adjinfill accepts True/False to change default infill from mean inputation to adjacent cell
    if 'adjinfill' in params:
      adjinfill = params['adjinfill']
    else:
      adjinfill = False
      
    #str_convert provides consistent encodings between numbers and string equivalent, eg 2 == '2'
    if 'str_convert' in params:
      str_convert = params['str_convert']
    else:
      str_convert = False
    
    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self.df_copy_train(mdf_train, column, column + '_bnr2', suffixoverlap_results)

      mdf_test[column + '_bnr2'] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self.df_check_suffixoverlap(mdf_train, column + '_bnr2', suffixoverlap_results)
      
      mdf_train.rename(columns = {column : column + '_bnr2'}, inplace = True)
      mdf_test.rename(columns = {column : column + '_bnr2'}, inplace = True)
    
    if str_convert is True:
      mdf_train[column + '_bnr2'] = mdf_train[column + '_bnr2'].astype(str)
      mdf_test[column + '_bnr2'] = mdf_test[column + '_bnr2'].astype(str)

    #create plug value for missing cells as most common value
    valuecounts = pd.DataFrame(mdf_train[column + '_bnr2'].value_counts())
    valuecounts = valuecounts.rename_axis('zzzinfill').sort_values(by = [column + '_bnr2', 'zzzinfill'], ascending = [False, True])
    valuecounts = list(valuecounts.index)
    
    if len(valuecounts) > 0:

      if len(valuecounts) > 1:
        #binary_missing_plug = valuecounts[0]
        binary_missing_plug = valuecounts[1]
      else:
        #making an executive decision here to deviate from standardinfill of most common value
        #for this edge case where a column evaluated as binary has only single value and NaN's
        binary_missing_plug = 'zzzinfill'

      #test for nan
      if binary_missing_plug != binary_missing_plug:
        #binary_missing_plug = valuecounts[1]
        binary_missing_plug = valuecounts[0]

      #edge case when applying this transform to set with >2 values
      #this only comes up when caluclating driftreport in postmunge
      extravalues = []
      if len(valuecounts) > 2:
        i=0
        for value in valuecounts:
          if i>1:
            extravalues.append(value)
          i+=1

      #replace nan in valuecounts with binary_missing_plug so we can sort
      valuecounts = [x if x == x else binary_missing_plug for x in valuecounts]
  #     #convert everything to string for sort
  #     valuecounts = [str(x) for x in valuecounts]

      #note LabelBinarizer encodes alphabetically, with 1 assigned to first and 0 to second
      #we'll take different approach of going by most common value to 1 unless 0 or 1
      #are already in the set then we'll defer to keeping those designations in place
      #there's some added complexity here to deal with edge case of passing this function
      #to a set with >2 values as we might run into when caluclating drift in postmunge

  #     valuecounts.sort()
  #     valuecounts = sorted(valuecounts)
      #in case this includes both strings and integers for instance we'll sort this way
  #     valuecounts = sorted(valuecounts, key=lambda p: str(p))

      #we'll save these in the normalization dictionary for future reference
      onevalue = valuecounts[0]
      if len(valuecounts) > 1:
        zerovalue = valuecounts[1]
      else:
        zerovalue = 'zzzinfill'

      #special case for when the source column is already encoded as 0/1

      if len(valuecounts) <= 2:

        if 0 in valuecounts:
          zerovalue = 0
          if 1 in valuecounts:
            onevalue = 1
          else:
            if valuecounts[0] == 0:
              if len(valuecounts) > 1:
                onevalue = valuecounts[1]
              else:
                onevalue = 'zzzinfill'

        if 1 in valuecounts:
          if 0 not in valuecounts:
            if valuecounts[0] != 1:
              onevalue = 1
              zerovalue = valuecounts[0]

      #edge case same as above but when values of 0 or 1. are in set and 
      #len(valuecounts) > 2
      if len(valuecounts) > 2:
        valuecounts2 = valuecounts[:2]

        if 0 in valuecounts2:
          zerovalue = 0
          if 1 in valuecounts2:
            onevalue = 1
          else:
            if valuecounts2[0] == 0:
              if len(valuecounts) > 1:
                onevalue = valuecounts2[1]
              else:
                onevalue = 'zzzinfill'

        if 1 in valuecounts2:
          if 0 not in valuecounts2:
            if valuecounts2[0] != 1:
              onevalue = 1
              zerovalue = valuecounts2[0]

      #edge case that might come up in drift report
      if binary_missing_plug not in [onevalue, zerovalue]:
        #binary_missing_plug = onevalue
        binary_missing_plug = zerovalue

      #edge case when applying this transform to set with >2 values
      #this only comes up when caluclating driftreport in postmunge
      if len(valuecounts) > 2:
        for value in extravalues:
          mdf_train[column + '_bnr2'] = \
          np.where(mdf_train[column + '_bnr2'] == value, binary_missing_plug, mdf_train[column + '_bnr2'])
          mdf_test[column + '_bnr2'] = \
          np.where(mdf_test[column + '_bnr2'] == value, binary_missing_plug, mdf_test[column + '_bnr2'])
          
      if adjinfill is True:
        mdf_train[column + '_bnr2'] = mdf_train[column + '_bnr2'].fillna(method='ffill')
        mdf_test[column + '_bnr2'] = mdf_test[column + '_bnr2'].fillna(method='ffill')
        mdf_train[column + '_bnr2'] = mdf_train[column + '_bnr2'].fillna(method='bfill')
        mdf_test[column + '_bnr2'] = mdf_test[column + '_bnr2'].fillna(method='bfill')

      #replace missing data with specified classification
      mdf_train[column + '_bnr2'] = mdf_train[column + '_bnr2'].fillna(binary_missing_plug)
      mdf_test[column + '_bnr2'] = mdf_test[column + '_bnr2'].fillna(binary_missing_plug)

      #this addressess issue where nunique for mdftest > than that for mdf_train
      #note is currently an oportunity for improvement that NArows won't identify these poinsts as candiadates
      #for user specified infill, and as currently addressed will default to infill with most common value
      #in the mean time a workaround could be for user to manually replace extra values with nan prior to
      #postmunge application such as if they want to apply ML infill
      #this will only be an issue when nunique for df_train == 2, and nunique for df_test > 2
      #if len(mdf_test[column + '_bnry'].unique()) > 2:
      uniqueintest = mdf_test[column + '_bnr2'].unique()
      for unique in uniqueintest:
        if unique not in [onevalue, zerovalue]:
          mdf_test[column + '_bnr2'] = \
          np.where(mdf_test[column + '_bnr2'] == unique, binary_missing_plug, mdf_test[column + '_bnr2'])

      #convert column to binary 0/1 classification (replaces scikit LabelBinarizer)
      mdf_train[column + '_bnr2'] = np.where(mdf_train[column + '_bnr2'] == onevalue, 1, 0)
      mdf_test[column + '_bnr2'] = np.where(mdf_test[column + '_bnr2'] == onevalue, 1, 0)

      #create list of columns
      bnrycolumns = [column + '_bnr2']

      #change data types to 8-bit (1 byte) integers for memory savings
      mdf_train[column + '_bnr2'] = mdf_train[column + '_bnr2'].astype(np.int8)
      mdf_test[column + '_bnr2'] = mdf_test[column + '_bnr2'].astype(np.int8)

      #a few more metrics collected for driftreport
      oneratio = mdf_train[column + '_bnr2'].sum() / mdf_train[column + '_bnr2'].shape[0]
      zeroratio = (mdf_train[column + '_bnr2'].shape[0] - mdf_train[column + '_bnr2'].sum() )\
                  / mdf_train[column + '_bnr2'].shape[0]

      #create list of columns associated with categorical transform (blank for now)
      categorylist = []
    
    else:
      mdf_train[column + '_bnr2'] = 0
      mdf_test[column + '_bnr2'] = 0
      
      binary_missing_plug = 0
      onevalue = 1
      zerovalue = 0
      extravalues = 0
      oneratio = 0
      zeroratio = 0
      bnrycolumns = [column + '_bnr2']

  #     bnrynormalization_dict = {column + '_bnry' : {'missing' : binary_missing_plug, \
  #                                                   'onevalue' : onevalue, \
  #                                                   'zerovalue' : zerovalue}}
    
    bnrynormalization_dict = {column + '_bnr2' : {'missing' : binary_missing_plug, \
                                                  1 : onevalue, \
                                                  0 : zerovalue, \
                                                  'extravalues' : extravalues, \
                                                  'oneratio' : oneratio, \
                                                  'zeroratio' : zeroratio, \
                                                  'adjinfill' : adjinfill, \
                                                  'str_convert' : str_convert}}

    #store some values in the column_dict{} for use later in ML infill methods
    column_dict_list = []

    for bc in bnrycolumns:

      column_dict = { bc : {'category' : 'bnr2', \
                           'origcategory' : category, \
                           'normalization_dict' : bnrynormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : bnrycolumns, \
                           'categorylist' : bnrycolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())

    #return mdf, bnrycolumns, categorylist, column_dict_list
    return mdf_train, mdf_test, column_dict_list
  
  def process_onht_class(self, mdf_train, mdf_test, column, category, postprocess_dict, params = {}):
    '''
    #process_onht_class(mdf_train, mdf_test, column, category, postprocess_dict, params = {})
    #preprocess column with one hot encoding
    #same as 'text' transform except labels returned column with integer instead of entry appender
    '''
    
    suffixoverlap_results = {}
    
    #adjinfill accepts True/False to change default infill from mean inputation to adjacent cell
    if 'adjinfill' in params:
      adjinfill = params['adjinfill']
    else:
      adjinfill = False
      
    #str_convert provides consistent encodings between numbers and string equivalent, eg 2 == '2'
    if 'str_convert' in params:
      str_convert = params['str_convert']
    else:
      str_convert = False
    
    tempcolumn = column + '_onht_'
    
    suffixoverlap_results = \
    self.df_check_suffixoverlap(mdf_train, tempcolumn, suffixoverlap_results)
    
    #store original column for later retrieval
    mdf_train[tempcolumn] = mdf_train[column].copy()
    mdf_test[tempcolumn] = mdf_test[column].copy()

    #convert column to category
    mdf_train[tempcolumn] = mdf_train[tempcolumn].astype('category')
    mdf_test[tempcolumn] = mdf_test[tempcolumn].astype('category')

    #if set is categorical we'll need the plug value for missing values included
    if 'zzzinfill' not in mdf_train[tempcolumn].cat.categories:
      mdf_train[tempcolumn] = mdf_train[tempcolumn].cat.add_categories(['zzzinfill'])
    if 'zzzinfill' not in mdf_test[tempcolumn].cat.categories:
      mdf_test[tempcolumn] = mdf_test[tempcolumn].cat.add_categories(['zzzinfill'])
      
    if adjinfill is True:
      mdf_train[tempcolumn] = mdf_train[tempcolumn].fillna(method='ffill')
      mdf_test[tempcolumn] = mdf_test[tempcolumn].fillna(method='ffill')
      mdf_train[tempcolumn] = mdf_train[tempcolumn].fillna(method='bfill')
      mdf_test[tempcolumn] = mdf_test[tempcolumn].fillna(method='bfill')

    #replace NA with a dummy variable
    mdf_train[tempcolumn] = mdf_train[tempcolumn].fillna('zzzinfill')
    mdf_test[tempcolumn] = mdf_test[tempcolumn].fillna('zzzinfill')

    if str_convert is True:
      #replace numerical with string equivalent
      mdf_train[tempcolumn] = mdf_train[tempcolumn].astype(str)
      mdf_test[tempcolumn] = mdf_test[tempcolumn].astype(str)
    else:
      mdf_train[tempcolumn] = mdf_train[tempcolumn].astype('object')
      mdf_test[tempcolumn] = mdf_test[tempcolumn].astype('object')

    #extract categories for column labels
    #note that .unique() extracts the labels as a numpy array
    labels_train = mdf_train[tempcolumn].unique()
#     labels_train.sort(axis=0)
    labels_train = sorted(labels_train, key=str)
    labels_train = list(labels_train)
    orig_labels_train = list(labels_train.copy())
    labels_test = mdf_test[tempcolumn].unique()
#     labels_test.sort(axis=0)
    labels_test = sorted(labels_test, key=str)
    labels_test = list(labels_test)

    #pandas one hot encoding doesn't sort integers and strings properly so using my own
    df_train_cat = pd.DataFrame(mdf_train[tempcolumn])
    df_test_cat = pd.DataFrame(mdf_test[tempcolumn])
    for entry in labels_train:
      df_train_cat[entry] = np.where(mdf_train[tempcolumn] == entry, 1, 0)
      df_test_cat[entry] = np.where(mdf_test[tempcolumn] == entry, 1, 0)
    del df_train_cat[tempcolumn]
    del df_test_cat[tempcolumn]
    
    labels_dict = {}
    i = 0
    for entry in labels_train:
      labels_dict.update({entry : column + '_onht_' + str(i)})
      i += 1
    
    #convert sparse array to pandas dataframe with column labels
    df_train_cat.columns = labels_train
    df_test_cat.columns = labels_train

    #Get missing columns in test set that are present in training set
    missing_cols = set( df_train_cat.columns ) - set( labels_test )
    
    suffixoverlap_results = \
    self.df_check_suffixoverlap(mdf_train, list(df_train_cat), suffixoverlap_results)

    #concatinate the sparse set with the rest of our training data
    mdf_train = pd.concat([mdf_train, df_train_cat], axis=1)
    mdf_test = pd.concat([mdf_test, df_test_cat], axis=1)

    del mdf_train[tempcolumn]    
    del mdf_test[tempcolumn]
    
    #delete _NArw column, this will be processed seperately in the processfamily function
    #delete support NArw2 column
#     columnNArw = column + '_NArw'
    columnNAr2 = column + '_zzzinfill'
    if columnNAr2 in mdf_train.columns:
      del mdf_train[columnNAr2]
    if columnNAr2 in mdf_test.columns:
      del mdf_test[columnNAr2]
    if 'zzzinfill' in orig_labels_train:
      orig_labels_train.remove('zzzinfill')

#     del mdf_train[column + '_NAr2']    
#     del mdf_test[column + '_NAr2']
    
    #create output of a list of the created column names
    NAcolumn = columnNAr2
    labels_train = list(df_train_cat)
    if NAcolumn in labels_train:
      labels_train.remove(NAcolumn)
    textcolumns = labels_train
    
    #now we'll creaate a dicitonary of the columns : categories for later reference
    #reminder here is list of. unque values from original column
    #labels_train
    
    normalizationdictvalues = labels_train
    normalizationdictkeys = textcolumns
    
    normalizationdictkeys = sorted(normalizationdictkeys, key=str)
    normalizationdictvalues = sorted(normalizationdictvalues, key=str)
    
    #textlabelsdict = dict(zip(normalizationdictkeys, normalizationdictvalues))
    textlabelsdict = dict(zip(normalizationdictvalues, orig_labels_train))
    
    #change data types to 8-bit (1 byte) integers for memory savings
    for textcolumn in textcolumns:
      mdf_train[textcolumn] = mdf_train[textcolumn].astype(np.int8)
      mdf_test[textcolumn] = mdf_test[textcolumn].astype(np.int8)

    #store some values in the text_dict{} for use later in ML infill methods
    column_dict_list = []

    categorylist = textcolumns.copy()
#     categorylist.remove(columnNArw)

    #now convert coloumn headers from text convention to onht convention
    mdf_train = mdf_train.rename(columns=labels_dict)
    mdf_test  = mdf_test.rename(columns=labels_dict)
    
    textcolumns = [labels_dict[entry] for entry in textcolumns]
    
    inverse_labels_dict = {value:key for key,value in labels_dict.items()}

    for tc in textcolumns:
    
      #new parameter collected for driftreport
      tc_ratio = tc + '_ratio'
      tcratio = mdf_train[tc].sum() / mdf_train[tc].shape[0]

      textnormalization_dict = {tc : {'textlabelsdict_onht' : textlabelsdict, \
                                      tc_ratio : tcratio, \
                                      'labels_dict' : labels_dict, \
                                      'inverse_labels_dict' : inverse_labels_dict, \
                                      'text_categorylist' : categorylist, \
                                      'adjinfill' : adjinfill, \
                                      'str_convert' : str_convert}}
      
      column_dict = {tc : {'category' : 'onht', \
                           'origcategory' : category, \
                           'normalization_dict' : textnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : textcolumns, \
                           'categorylist' : textcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
    
    return mdf_train, mdf_test, column_dict_list

  def process_text_class(self, mdf_train, mdf_test, column, category, postprocess_dict, params = {}):
    '''
    #process_text_class(mdf_train, mdf_test, column, category)
    #preprocess column with text categories
    #takes as arguement two pandas dataframe containing training and test data respectively 
    #(mdf_train, mdf_test), and the name of the column string ('column')
    #and the name of the category from parent column (category)
    #note this trains both training and test data simultaneously due to unique treatment if any category
    #missing from training set but not from test set to ensure consistent formatting 
    #doesn't delete the original column from master dataframe but
    #creates onehot encodings
    #with columns named after column_ + text categories
    #any categories missing from the training set removed from test set
    #any category present in training but missing from test set given a column of zeros for consistent formatting
    #ensures order of all new columns consistent between both sets
    #returns two transformed dataframe (mdf_train, mdf_test) \
    #and a list of the new column names (textcolumns)
    
    #if only have training but not test data handy, use same training data for both dataframe inputs
    '''
    
    suffixoverlap_results = {}
    
    #adjinfill accepts True/False to change default infill from mean inputation to adjacent cell
    if 'adjinfill' in params:
      adjinfill = params['adjinfill']
    else:
      adjinfill = False
    
    tempsuffix = str(mdf_train[column].unique()[0])
    
    tempcolumn = column + '_' + tempsuffix
    
    suffixoverlap_results = \
    self.df_check_suffixoverlap(mdf_train, tempcolumn, suffixoverlap_results)
    
    #store original column for later retrieval
    mdf_train[tempcolumn] = mdf_train[column].copy()
    mdf_test[tempcolumn] = mdf_test[column].copy()

    #convert column to category
    mdf_train[tempcolumn] = mdf_train[tempcolumn].astype('category')
    mdf_test[tempcolumn] = mdf_test[tempcolumn].astype('category')

    #if set is categorical we'll need the plug value for missing values included
    if 'zzzinfill' not in mdf_train[tempcolumn].cat.categories:
      mdf_train[tempcolumn] = mdf_train[tempcolumn].cat.add_categories(['zzzinfill'])
    if 'zzzinfill' not in mdf_test[tempcolumn].cat.categories:
      mdf_test[tempcolumn] = mdf_test[tempcolumn].cat.add_categories(['zzzinfill'])
    
    if adjinfill is True:
      mdf_train[tempcolumn] = mdf_train[tempcolumn].fillna(method='ffill')
      mdf_test[tempcolumn] = mdf_test[tempcolumn].fillna(method='ffill')
      mdf_train[tempcolumn] = mdf_train[tempcolumn].fillna(method='bfill')
      mdf_test[tempcolumn] = mdf_test[tempcolumn].fillna(method='bfill')
      
    #replace NA with a dummy variable
    mdf_train[tempcolumn] = mdf_train[tempcolumn].fillna('zzzinfill')
    mdf_test[tempcolumn] = mdf_test[tempcolumn].fillna('zzzinfill')

    #replace numerical with string equivalent
    mdf_train[tempcolumn] = mdf_train[tempcolumn].astype(str)
    mdf_test[tempcolumn] = mdf_test[tempcolumn].astype(str)

    #extract categories for column labels
    #note that .unique() extracts the labels as a numpy array
    labels_train = mdf_train[tempcolumn].unique()
    labels_train.sort(axis=0)
    labels_train = list(labels_train)
    orig_labels_train = list(labels_train.copy())
    labels_test = mdf_test[tempcolumn].unique()
    labels_test.sort(axis=0)
    labels_test = list(labels_test)

    #pandas one hot encoder
    df_train_cat = pd.get_dummies(mdf_train[tempcolumn])
    df_test_cat = pd.get_dummies(mdf_test[tempcolumn])

    #append column header name to each category listing
    labels_train = [column + '_' + entry for entry in labels_train]
    labels_test = [column + '_' + entry for entry in labels_test]
    
    #convert sparse array to pandas dataframe with column labels
    df_train_cat.columns = labels_train
    df_test_cat.columns = labels_test

    #Get missing columns in test set that are present in training set
    missing_cols = set( df_train_cat.columns ) - set( df_test_cat.columns )

    #Add a missing column in test set with default value equal to 0
    for c in missing_cols:
        df_test_cat[c] = 0
    #Ensure the order of column in the test set is in the same order than in train set
    #Note this also removes categories in test set that aren't present in training set
    df_test_cat = df_test_cat[df_train_cat.columns]

    del mdf_train[tempcolumn]    
    del mdf_test[tempcolumn]
    
    suffixoverlap_results = \
    self.df_check_suffixoverlap(mdf_train, list(df_train_cat), suffixoverlap_results)
    
    #concatinate the sparse set with the rest of our training data
    mdf_train = pd.concat([mdf_train, df_train_cat], axis=1)
    mdf_test = pd.concat([mdf_test, df_test_cat], axis=1)
    
    #delete _NArw column, this will be processed seperately in the processfamily function
    #delete support NArw2 column
#     columnNArw = column + '_NArw'
    columnNAr2 = column + '_zzzinfill'
    if columnNAr2 in mdf_train.columns:
      del mdf_train[columnNAr2]
    if columnNAr2 in mdf_test.columns:
      del mdf_test[columnNAr2]
    if 'zzzinfill' in orig_labels_train:
      orig_labels_train.remove('zzzinfill')

#     del mdf_train[column + '_NAr2']    
#     del mdf_test[column + '_NAr2']
    
    #create output of a list of the created column names
    NAcolumn = columnNAr2
    labels_train = list(df_train_cat)
    if NAcolumn in labels_train:
      labels_train.remove(NAcolumn)
    textcolumns = labels_train
    
    #now we'll creaate a dicitonary of the columns : categories for later reference
    #reminder here is list of. unque values from original column
    #labels_train
    
    normalizationdictvalues = labels_train
    normalizationdictkeys = textcolumns
    
    normalizationdictkeys.sort()
    normalizationdictvalues.sort()
    
    #textlabelsdict = dict(zip(normalizationdictkeys, normalizationdictvalues))
    textlabelsdict = dict(zip(normalizationdictvalues, orig_labels_train))
    
    #change data types to 8-bit (1 byte) integers for memory savings
    for textcolumn in textcolumns:
      mdf_train[textcolumn] = mdf_train[textcolumn].astype(np.int8)
      mdf_test[textcolumn] = mdf_test[textcolumn].astype(np.int8)

    #store some values in the text_dict{} for use later in ML infill methods
    column_dict_list = []

    for tc in textcolumns:
    
      #new parameter collected for driftreport
      tc_ratio = tc + '_ratio'
      tcratio = mdf_train[tc].sum() / mdf_train[tc].shape[0]

      textnormalization_dict = {tc : {'textlabelsdict_text' : textlabelsdict, \
                                      tc_ratio : tcratio, \
                                      'adjinfill' : adjinfill}}
      
      column_dict = {tc : {'category' : 'text', \
                           'origcategory' : category, \
                           'normalization_dict' : textnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : textcolumns, \
                           'categorylist' : textcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
    
    return mdf_train, mdf_test, column_dict_list

  def process_lngt_class(self, df, column, category, postprocess_dict, params = {}):
    '''
    #processing funciton that length of string for each entry
    #such as a heuristic for information content
    #default infill is len(str(np.nan)) = 3
    #note this is a "singleprocess" function since is applied to single dataframe
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    if inplace is not True:
      
      #copy source column into new column
      df, suffixoverlap_results = \
      self.df_copy_train(df, column, column + '_lngt', suffixoverlap_results)
    
    else:
      
      suffixoverlap_results = \
      self.df_check_suffixoverlap(df, column + '_lngt', suffixoverlap_results)
      
      df.rename(columns = {column : column + '_lngt'}, inplace = True)
    
    df[column + '_lngt'] = df[column + '_lngt'].astype(str).apply(len)
    
    #grab a fe4w driftreport metrics:
    #get maximum value of training column
    maximum = df[column + '_lngt'].max()
    
    #get minimum value of training column
    minimum = df[column + '_lngt'].min()
    
    #get minimum value of training column
    mean = df[column + '_lngt'].mean()
    
    #get standard deviation of training column
    std = df[column + '_lngt'].std()

    #create list of columns
    columns = [column + '_lngt']

    #create normalization dictionary
    normalization_dict = {column + '_lngt' : {'maximum' : maximum, \
                                              'minimum' : minimum, \
                                              'mean' : mean, \
                                              'std' : std }}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []
    
    for nc in columns:

      column_dict = { nc : {'category' : 'lngt', \
                           'origcategory' : category, \
                           'normalization_dict' : normalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : columns, \
                           'categorylist' : columns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())

    return df, column_dict_list
  
  def process_UPCS_class(self, df, column, category, postprocess_dict, params = {}):
    '''
    #processing funciton that converts columns to uppercase strings
    #such as to allow consistnet encoding if data has upper/lower case discrepencies
    #default infill is a distinct entry as string NAN
    #note that with assigninfill this can be converted to other infill methods
    #returns same dataframe with new column of name column + '_UPCS'
    #note this is a "singleprocess" function since is applied to single dataframe
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    if 'activate' in params:
      activate = params['activate']
    else:
      activate = True
    
    if inplace is not True:
      
      #copy source column into new column
      df, suffixoverlap_results = \
      self.df_copy_train(df, column, column + '_UPCS', suffixoverlap_results)
    
    else:
      
      suffixoverlap_results = \
      self.df_check_suffixoverlap(df, column + '_UPCS', suffixoverlap_results)
      
      df.rename(columns = {column : column + '_UPCS'}, inplace = True)
    
    #convert to uppercase string based on activate parameter
    if activate is True:
      #convert column to string except for nan infill points
      df[column + '_UPCS'] = \
      np.where(df[column + '_UPCS'] == df[column + '_UPCS'], df[column + '_UPCS'].astype(str), df[column + '_UPCS'])
      #convert to uppercase
      df[column + '_UPCS'] = \
      np.where(df[column + '_UPCS'] == df[column + '_UPCS'], df[column + '_UPCS'].str.upper(), df[column + '_UPCS'])

    #create list of columns
    UPCScolumns = [column + '_UPCS']

    #create normalization dictionary
    normalization_dict = {column + '_UPCS' : {'activate' : activate}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []
    
    for nc in UPCScolumns:

      column_dict = { nc : {'category' : 'UPCS', \
                           'origcategory' : category, \
                           'normalization_dict' : normalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : UPCScolumns, \
                           'categorylist' : UPCScolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())

    return df, column_dict_list

  def process_splt_class(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    #process_splt_class(mdf_train, mdf_test, column, category)
    #preprocess column with categorical entries as strings
    #identifies overlaps of subsets of those strings and records
    #as a new boolan column
    #for example, if a categoical set consisted of unique values 
    #['west', 'north', 'northeast']
    #then a new column would be created idenitifying cells which included 
    #'north' in their entries
    #(here for north and northeast)
    #returns as column titled origcolumn_splt_entry    
    #missing values are ignored by default
    '''
    
    suffixoverlap_results = {}
    
    #overlap_lengths = [20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7 , 6, 5]
    
    if 'minsplit' in params:
      minsplit = params['minsplit'] - 1
    else:
      minsplit = 4
      
    if 'space_and_punctuation' in params:
      space_and_punctuation = params['space_and_punctuation']
    else:
      space_and_punctuation = True
      
    if 'excluded_characters' in params:
      excluded_characters = params['excluded_characters']
    else:
      excluded_characters = [' ', ',', '.', '?', '!', '(', ')']
      
    if 'concurrent_activations' in params:
      concurrent_activations = params['concurrent_activations']
    else:
      concurrent_activations = False
      
    if 'int_headers' in params:
      int_headers = params['int_headers']
    else:
      int_headers = False
      
    if 'suffix' in params:
      suffix = params['suffix']
    else:
      suffix = '_splt'
      
    if 'test_same_as_train' in params:
      test_same_as_train = params['test_same_as_train']
    else:
      test_same_as_train = False
    
    #first we find overlaps from mdf_train
    
    unique_list = list(mdf_train[column].unique())

    unique_list = list(map(str, unique_list))
    
    maxlength = max(len(x) for x in unique_list)
    
    overlap_lengths = list(range(maxlength - 1, minsplit, -1))

    overlap_dict = {}

    #we'll populate overlap_dict as
    #{extract_with_overlap : [list of associate categories with that overlap]}

    #we'll cycle through the overlap lengths and only record an overlap 
    #if it is not a subset of those already recorded
    
    for overlap_length in overlap_lengths:

      for unique in unique_list:

        len_unique = len(unique)

        if len_unique >= overlap_length:

          nbr_iterations = len_unique - overlap_length + 1

          for i in range(nbr_iterations):

            extract = unique[i:(overlap_length+i)]

            extract_already_in_overlap_dict = False

            for key in overlap_dict:

              len_key = len(key)

              if len_key >= overlap_length:

                nbr_iterations3 = len_key - overlap_length + 1

                for k in range(nbr_iterations3):

                  extract3 = key[k:(overlap_length+k)]
                  
                  if concurrent_activations is False:

                    if extract == extract3:

                      extract_already_in_overlap_dict = True
                      
                      break
                      
                  elif concurrent_activations is True:
                    
                    if extract == extract3 and unique in overlap_dict[key]:

                      extract_already_in_overlap_dict = True
                      
                      break
                      
                if extract_already_in_overlap_dict is True:
                  
                  break

            if extract_already_in_overlap_dict is False:

              for unique2 in unique_list:

                if unique2 != unique:

                  len_unique2 = len(unique2)

                  nbr_iterations2 = len_unique2 - overlap_length + 1

                  for j in range(nbr_iterations2):

                    extract2 = unique2[j:(overlap_length+j)]

                    #________
                    
                    if space_and_punctuation is True:

                      if extract2 == extract:

                        if extract in overlap_dict:

                          if unique2 not in overlap_dict[extract]:

                            overlap_dict[extract].append(unique2)
                            
                            if concurrent_activations is False:

                              break

                          if unique not in overlap_dict[extract]:

                            overlap_dict[extract].append(unique)
                            
                            if concurrent_activations is False:

                              break

                        #else if we don't have a key for extract
                        else:

                          overlap_dict.update({extract : [unique, unique2]})
                          
                          if concurrent_activations is False:

                            break
                          
                    elif space_and_punctuation is False:
                      
                      for scrub_punctuation in excluded_characters:
                        
                        extract2 = extract2.replace(scrub_punctuation, '')
                        
                      #if any punctuation was scrubbed these two extracts will be different lengths
                      if extract2 == extract:

                        if extract in overlap_dict:

                          if unique2 not in overlap_dict[extract]:

                            overlap_dict[extract].append(unique2)
                            
                            if concurrent_activations is False:

                              break

                          if unique not in overlap_dict[extract]:

                            overlap_dict[extract].append(unique)
                            
                            if concurrent_activations is False:

                              break

                        #else if we don't have a key for extract
                        else:

                          overlap_dict.update({extract : [unique, unique2]})
                          
                          if concurrent_activations is False:

                            break
     
    #now for mdf_test we'll only consider those overlaps already identified from train set
    
    if test_same_as_train is True:
      test_overlap_dict = overlap_dict
    
    elif test_same_as_train is False:

      unique_list_test = list(mdf_test[column].unique())

      unique_list_test = list(map(str, unique_list_test))

      test_overlap_dict = {}

      train_keys = list(overlap_dict)

      train_keys.sort(key = len, reverse=True)

      for key in train_keys:

        test_overlap_dict.update({key:[]})

      for dict_key in train_keys:

        for unique_test in unique_list_test:

          len_key = len(dict_key)

          if len(unique_test) >= len_key:

            nbr_iterations4 = len(unique_test) - len_key + 1

            for l in range(nbr_iterations4):

              extract4 = unique_test[l:(len_key+l)]

              if extract4 == dict_key:

                test_overlap_dict[dict_key].append(unique_test)

                if concurrent_activations is False:

                  break
    
    newcolumns = []

    for dict_key in overlap_dict:

      newcolumn = column + suffix + '_' + dict_key
      
      mdf_train, suffixoverlap_results = \
      self.df_copy_train(mdf_train, column, newcolumn, suffixoverlap_results)
      
      mdf_test[newcolumn] = mdf_test[column].copy()

      mdf_train[newcolumn] = mdf_train[newcolumn].astype(str)
      mdf_test[newcolumn] = mdf_test[newcolumn].astype(str)

      mdf_train[newcolumn] = mdf_train[newcolumn].isin(overlap_dict[dict_key])
      mdf_train[newcolumn] = mdf_train[newcolumn].astype(np.int8)
      
      mdf_test[newcolumn] = mdf_test[newcolumn].isin(test_overlap_dict[dict_key])
      mdf_test[newcolumn] = mdf_test[newcolumn].astype(np.int8)

      newcolumns.append(newcolumn)
      
    preint_newcolumns = newcolumns.copy()
      
    if int_headers is True:
      
      int_labels_dict = {}
      i = 0
      for entry in newcolumns:
        int_labels_dict.update({entry : column + suffix + '_' + str(i)})
        i += 1
        
      newcolumns = [int_labels_dict[entry] for entry in newcolumns]
        
      #now convert column headers from string to int convention
      suffixoverlap_results = \
      self.df_check_suffixoverlap(mdf_train, newcolumns, suffixoverlap_results)
      
      mdf_train = mdf_train.rename(columns=int_labels_dict)
      mdf_test  = mdf_test.rename(columns=int_labels_dict)

      inverse_int_labels_dict = {value:key for key,value in int_labels_dict.items()}
      for key in inverse_int_labels_dict:
        inverse_int_labels_dict[key] = inverse_int_labels_dict[key][len(column) + 1:]
        
    else:
      int_labels_dict = False
      inverse_int_labels_dict = False
    
    column_dict_list = []

    for tc in newcolumns:

      textnormalization_dict = {tc : {'suffix' : suffix, \
                                      'test_same_as_train' : test_same_as_train, \
                                      'overlap_dict' : overlap_dict, \
                                      'splt_newcolumns_splt'   : newcolumns, \
                                      'minsplit' : minsplit, \
                                      'concurrent_activations' : concurrent_activations, \
                                      'preint_newcolumns' : preint_newcolumns, \
                                      'int_headers' : int_headers, \
                                      'int_labels_dict' : int_labels_dict, \
                                      'inverse_int_labels_dict' : inverse_int_labels_dict}}
      
      column_dict = {tc : {'category' : 'splt', \
                           'origcategory' : category, \
                           'normalization_dict' : textnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : newcolumns, \
                           'categorylist' : newcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
      
    if len(newcolumns) == 0:
      
      column_dict_list = []
    
    return mdf_train, mdf_test, column_dict_list

  def process_spl2_class(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    #process_spl2_class(mdf_train, mdf_test, column, category)
    #preprocess column with categorical entries as strings
    #identifies overlaps of subsets of those strings and replaces entries 
    #with their redecued overlap
    #replaces entries without overlap to 0 (unique to spl5)
    #for example, if a categorical set consisted of unique values 
    #['west', 'north', 'northeast']
    #then a new column would be created in which the entry 'north' 
    #replaced cells with north in their entries
    #(here for north and northeast)
    #and cells with west would be set to 0
    #returns as column titled origcolumn_spl2
    #missing values are ignored by default
    #this alternative to splt may be benficial for instance if one wanted 
    #to follow with an ordl encoding
    '''
    
    suffixoverlap_results = {}
    
#     overlap_lengths = [20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7 , 6, 5]

    if 'minsplit' in params:
        
      minsplit = params['minsplit'] - 1
    
    else:
      
      minsplit = 4
      
    if 'space_and_punctuation' in params:
      space_and_punctuation = params['space_and_punctuation']
    else:
      space_and_punctuation = True
      
    if 'excluded_characters' in params:
      excluded_characters = params['excluded_characters']
    else:
      excluded_characters = [' ', ',', '.', '?', '!', '(', ')']
      
    if 'suffix' in params:
      suffix = params['suffix']
    else:
      suffix = '_spl2'
      
    if 'test_same_as_train' in params:
      test_same_as_train = params['test_same_as_train']
    else:
      test_same_as_train = False
      
    if 'consolidate_nonoverlaps' in params:
      consolidate_nonoverlaps = params['consolidate_nonoverlaps']
    else:
      consolidate_nonoverlaps = False
    
    #first we find overlaps from mdf_train
    
    unique_list = list(mdf_train[column].unique())

    unique_list = list(map(str, unique_list))
    
    maxlength = max(len(x) for x in unique_list)
    
    overlap_lengths = list(range(maxlength - 1, minsplit, -1))

    overlap_dict = {}

    #we'll populate overlap_dict as
    #{extract_with_overlap : [list of associate categories with that overlap]}

    #we'll cycle through the overlap lengths and only record an overlap 
    #if it is not a subset of those already recorded
    
    for overlap_length in overlap_lengths:

      for unique in unique_list:

        len_unique = len(unique)

        if len_unique >= overlap_length:

          nbr_iterations = len_unique - overlap_length + 1

          for i in range(nbr_iterations):

            extract = unique[i:(overlap_length+i)]

            extract_already_in_overlap_dict = False

            for key in overlap_dict:

              len_key = len(key)

              if len_key >= overlap_length:

                nbr_iterations3 = len_key - overlap_length + 1

                for k in range(nbr_iterations3):

                  extract3 = key[k:(overlap_length+k)]

                  if extract == extract3:

                    extract_already_in_overlap_dict = True
                    
                    break
                    
                if extract_already_in_overlap_dict is True:
                  
                  break

            if extract_already_in_overlap_dict is False:

              for unique2 in unique_list:

                if unique2 != unique:

                  len_unique2 = len(unique2)

                  nbr_iterations2 = len_unique2 - overlap_length + 1

                  for j in range(nbr_iterations2):

                    extract2 = unique2[j:(overlap_length+j)]

                    #________
                    
                    if space_and_punctuation is True:

                      if extract2 == extract:

                        if extract in overlap_dict:

                          if unique2 not in overlap_dict[extract]:

                            overlap_dict[extract].append(unique2)
                            
                            break

                          if unique not in overlap_dict[extract]:

                            overlap_dict[extract].append(unique)
                            
                            break

                        #else if we don't have a key for extract
                        else:

                          overlap_dict.update({extract : [unique, unique2]})
                          
                          break
                          
                    elif space_and_punctuation is False:
                      
                      for scrub_punctuation in excluded_characters:
                        
                        extract2 = extract2.replace(scrub_punctuation, '')
                        
                      #if any punctuation was scrubbed these two extracts will be different lengths
                      if extract2 == extract:

                        if extract in overlap_dict:

                          if unique2 not in overlap_dict[extract]:

                            overlap_dict[extract].append(unique2)
                            
                            break

                          if unique not in overlap_dict[extract]:

                            overlap_dict[extract].append(unique)
                            
                            break

                        #else if we don't have a key for extract
                        else:

                          overlap_dict.update({extract : [unique, unique2]})
                          
                          break
      
    #now for mdf_test we'll only consider those overlaps already 
    #identified from train set
    
    if test_same_as_train is True:
      test_overlap_dict = overlap_dict
    
    elif test_same_as_train is False:
    
      unique_list_test = list(mdf_test[column].unique())

      unique_list_test = list(map(str, unique_list_test))

      test_overlap_dict = {}

      train_keys = list(overlap_dict)

      train_keys.sort(key = len, reverse=True)

      for key in train_keys:

        test_overlap_dict.update({key:[]})

      for dict_key in train_keys:

        for unique_test in unique_list_test:

          len_key = len(dict_key)

          if len(unique_test) >= len_key:

            nbr_iterations4 = len(unique_test) - len_key + 1

            for l in range(nbr_iterations4):

              extract4 = unique_test[l:(len_key+l)]

              if extract4 == dict_key:

                test_overlap_dict[dict_key].append(unique_test)

                break
    
    #so that was all comparable to splt, now for spl2 we'll create a new 
    #dictionary structred as
    #{original unique value : overlap extract for replacement}
    
    #since one original unique value may have entries as multiple overlaps, 
    #we'll prioritize overlaps with
    #longer string lengths and then alphabetical
    
    spl2_overlap_dict = {}
    
    overlap_key_list = list(overlap_dict)
    
    overlap_key_list.sort()
    overlap_key_list.sort(key = len, reverse=True)
    
    for overlap_key in overlap_key_list:
      
      for entry in overlap_dict[overlap_key]:
        
        if entry not in spl2_overlap_dict:
          
          spl2_overlap_dict.update({entry : overlap_key})
    
    #here's where we identify values to set to 0 for spl5
    spl5_zero_dict = {}
    if consolidate_nonoverlaps is True:
      for entry in unique_list:
        if entry not in spl2_overlap_dict:
          spl5_zero_dict.update({entry : 0})
    
    #then we'll do same for test set
    
    spl2_test_overlap_dict = {}
    
    test_overlap_key_list = list(test_overlap_dict)
    
    test_overlap_key_list.sort()
    test_overlap_key_list.sort(key = len, reverse=True)
    
    for overlap_key in test_overlap_key_list:
      
      for entry in test_overlap_dict[overlap_key]:
        
        if entry not in spl2_test_overlap_dict:
          
          spl2_test_overlap_dict.update({entry : overlap_key})
    
    #here's where we identify values to set to 0 for spl5
    spl5_test_zero_dict = {}
    if consolidate_nonoverlaps is True:

      if test_same_as_train is True:
        unique_list_test = list(mdf_test[column].unique())
        unique_list_test = list(map(str, unique_list_test))

      for entry in unique_list_test:
        if entry not in spl2_test_overlap_dict:
          spl5_test_zero_dict.update({entry : 0})
    
    newcolumns = []

#     for dict_key in overlap_dict:

    newcolumn = column + suffix
    
    mdf_train, suffixoverlap_results = \
    self.df_copy_train(mdf_train, column, newcolumn, suffixoverlap_results)
    
    mdf_test[newcolumn] = mdf_test[column].copy()
    
    mdf_train[newcolumn] = mdf_train[newcolumn].astype(str)
    mdf_test[newcolumn] = mdf_test[newcolumn].astype(str)

    mdf_train[newcolumn] = mdf_train[newcolumn].replace(spl2_overlap_dict)
    mdf_train[newcolumn] = mdf_train[newcolumn].replace(spl5_zero_dict)

#       mdf_train[newcolumn] = mdf_train[column].isin(overlap_dict[dict_key])
#       mdf_train[newcolumn] = mdf_train[newcolumn].astype(np.int8)

    mdf_test[newcolumn] = mdf_test[newcolumn].replace(spl2_test_overlap_dict)
    mdf_test[newcolumn] = mdf_test[newcolumn].replace(spl5_test_zero_dict)

#       mdf_test[newcolumn] = mdf_test[column].isin(test_overlap_dict[dict_key])
#       mdf_test[newcolumn] = mdf_test[newcolumn].astype(np.int8)

    newcolumns.append(newcolumn)
    
    column_dict_list = []

    for tc in newcolumns:

      textnormalization_dict = {tc : {'suffix' : suffix, \
                                      'test_same_as_train' : test_same_as_train, \
                                      'consolidate_nonoverlaps' : consolidate_nonoverlaps, \
                                      'overlap_dict' : overlap_dict, \
                                      'spl2_newcolumns'   : newcolumns, 
                                      'spl2_overlap_dict' : spl2_overlap_dict, \
                                      'spl2_test_overlap_dict' : spl2_test_overlap_dict, \
                                      'spl5_zero_dict' : spl5_zero_dict, \
                                      'minsplit' : minsplit}}
      
      column_dict = {tc : {'category' : 'spl2', \
                           'origcategory' : category, \
                           'normalization_dict' : textnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : newcolumns, \
                           'categorylist' : newcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
      
    if len(newcolumns) == 0:
      
      column_dict_list = []

    return mdf_train, mdf_test, column_dict_list

  def process_sp19_class(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    #process_splt_class(mdf_train, mdf_test, column, category)
    #preprocess column with categorical entries as strings
    #identifies overlaps of subsets of those strings and records
    #as a new boolan column
    #for example, if a categoical set consisted of unique values 
    #['west', 'north', 'northeast']
    #then a new column would be created idenitifying cells which included 
    #'north' in their entries
    #(here for north and northeast)
    #returns as column titled origcolumn_splt_entry    
    #missing values are ignored by default
    
    #sp15 is comparable to splt but multiple concurrent activations allowed
    #so requires a different MLinfilltype in processdict
    
    #sp19 is comparable to sp15 but with a returned binary encoding aggregation
    '''
    
    suffixoverlap_results = {}
    
    #overlap_lengths = [20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7 , 6, 5]
    
    if 'minsplit' in params:
      minsplit = params['minsplit'] - 1
    else:
      minsplit = 4
      
    if 'space_and_punctuation' in params:
      space_and_punctuation = params['space_and_punctuation']
    else:
      space_and_punctuation = True
      
    if 'excluded_characters' in params:
      excluded_characters = params['excluded_characters']
    else:
      excluded_characters = [' ', ',', '.', '?', '!', '(', ')']
      
    if 'int_headers' in params:
      int_headers = params['int_headers']
    else:
      int_headers = False
      
    #note that same MLinfilltype in processdict ('1010')
    #may be used for both configurations but applying concurrent_activations = False
    #with sp11 is less efficient then running splt
    if 'concurrent_activations' in params:
      concurrent_activations = params['concurrent_activations']
    else:
      concurrent_activations = True
      
    if 'suffix' in params:
      suffix = params['suffix']
    else:
      suffix = '_sp19'
      
    if 'test_same_as_train' in params:
      test_same_as_train = params['test_same_as_train']
    else:
      test_same_as_train = False
    
    #first we find overlaps from mdf_train
    
    unique_list = list(mdf_train[column].unique())

    unique_list = list(map(str, unique_list))
    
    maxlength = max(len(x) for x in unique_list)
    
    overlap_lengths = list(range(maxlength - 1, minsplit, -1))

    overlap_dict = {}

    #we'll populate overlap_dict as
    #{extract_with_overlap : [list of associate categories with that overlap]}

    #we'll cycle through the overlap lengths and only record an overlap 
    #if it is not a subset of those already recorded
    
    for overlap_length in overlap_lengths:

      for unique in unique_list:

        len_unique = len(unique)

        if len_unique >= overlap_length:

          nbr_iterations = len_unique - overlap_length + 1

          for i in range(nbr_iterations):

            extract = unique[i:(overlap_length+i)]

            extract_already_in_overlap_dict = False

            for key in overlap_dict:

              len_key = len(key)

              if len_key >= overlap_length:

                nbr_iterations3 = len_key - overlap_length + 1

                for k in range(nbr_iterations3):

                  extract3 = key[k:(overlap_length+k)]
                  
                  if concurrent_activations is False:

                    if extract == extract3:

                      extract_already_in_overlap_dict = True
                      
                      break
                      
                  elif concurrent_activations is True:
                    
                    if extract == extract3 and unique in overlap_dict[key]:

                      extract_already_in_overlap_dict = True
                      
                      break
                      
                if extract_already_in_overlap_dict is True:
                  
                  break

            if extract_already_in_overlap_dict is False:

              for unique2 in unique_list:

                if unique2 != unique:

                  len_unique2 = len(unique2)

                  nbr_iterations2 = len_unique2 - overlap_length + 1

                  for j in range(nbr_iterations2):

                    extract2 = unique2[j:(overlap_length+j)]

                    #________
                    
                    if space_and_punctuation is True:

                      if extract2 == extract:

                        if extract in overlap_dict:

                          if unique2 not in overlap_dict[extract]:

                            overlap_dict[extract].append(unique2)
                            
                            if concurrent_activations is False:

                              break

                          if unique not in overlap_dict[extract]:

                            overlap_dict[extract].append(unique)
                            
                            if concurrent_activations is False:

                              break

                        #else if we don't have a key for extract
                        else:

                          overlap_dict.update({extract : [unique, unique2]})
                          
                          if concurrent_activations is False:

                            break
                          
                    elif space_and_punctuation is False:
                      
                      for scrub_punctuation in excluded_characters:
                        
                        extract2 = extract2.replace(scrub_punctuation, '')
                        
                      #if any punctuation was scrubbed these two extracts will be different lengths
                      if extract2 == extract:

                        if extract in overlap_dict:

                          if unique2 not in overlap_dict[extract]:

                            overlap_dict[extract].append(unique2)
                            
                            if concurrent_activations is False:

                              break

                          if unique not in overlap_dict[extract]:

                            overlap_dict[extract].append(unique)
                            
                            if concurrent_activations is False:

                              break

                        #else if we don't have a key for extract
                        else:

                          overlap_dict.update({extract : [unique, unique2]})
                          
                          if concurrent_activations is False:

                            break
        
    #now for mdf_test we'll only consider those overlaps already identified from train set
    
    if test_same_as_train is True:
      test_overlap_dict = overlap_dict
    
    elif test_same_as_train is False:

      unique_list_test = list(mdf_test[column].unique())

      unique_list_test = list(map(str, unique_list_test))

      test_overlap_dict = {}

      train_keys = list(overlap_dict)

      train_keys.sort(key = len, reverse=True)

      for key in train_keys:

        test_overlap_dict.update({key:[]})

      for dict_key in train_keys:

        for unique_test in unique_list_test:

          len_key = len(dict_key)

          if len(unique_test) >= len_key:

            nbr_iterations4 = len(unique_test) - len_key + 1

            for l in range(nbr_iterations4):

              extract4 = unique_test[l:(len_key+l)]

              if extract4 == dict_key:

                test_overlap_dict[dict_key].append(unique_test)

                if concurrent_activations is False:

                  break
    
    newcolumns = []

    for dict_key in overlap_dict:

      newcolumn = column + '_sp15_' + dict_key
      
      mdf_train, suffixoverlap_results = \
      self.df_copy_train(mdf_train, column, newcolumn, suffixoverlap_results)
#       mdf_train[newcolumn] = mdf_train[column].copy()
      
      mdf_test[newcolumn] = mdf_test[column].copy()

      mdf_train[newcolumn] = mdf_train[newcolumn].astype(str)
      mdf_test[newcolumn] = mdf_test[newcolumn].astype(str)

      mdf_train[newcolumn] = mdf_train[newcolumn].isin(overlap_dict[dict_key])
      mdf_train[newcolumn] = mdf_train[newcolumn].astype(np.int8)
      
      mdf_test[newcolumn] = mdf_test[newcolumn].isin(test_overlap_dict[dict_key])
      mdf_test[newcolumn] = mdf_test[newcolumn].astype(np.int8)

      newcolumns.append(newcolumn)
      
    preint_newcolumns = newcolumns.copy()
      
    if int_headers is True:
      
      int_labels_dict = {}
      i = 0
      for entry in newcolumns:
        int_labels_dict.update({entry : column + '_sp15_' + str(i)})
        i += 1
        
      #now convert column headers from string to int convention
      mdf_train = mdf_train.rename(columns=int_labels_dict)
      mdf_test  = mdf_test.rename(columns=int_labels_dict)

      newcolumns = [int_labels_dict[entry] for entry in newcolumns]

      inverse_int_labels_dict = {value:key for key,value in int_labels_dict.items()}
      for key in inverse_int_labels_dict:
        inverse_int_labels_dict[key] = inverse_int_labels_dict[key][len(column) + 1:]
        
    else:
      int_labels_dict = False
      inverse_int_labels_dict = False
    
    column_dict_list = []
    
    #begin binary encoding of set, leaving the int_headers here out of convenience, not really needed
    
    if len(newcolumns) > 0:
      
      sp19_column = column + suffix
    
      #aggregate collection of activations as string set
      #the suffix 'activations_' is to avoid potential of overlap with binary encoding and aggregated activations
      mdf_train[sp19_column] = 'activations_'
      mdf_test[sp19_column] = 'activations_'

      for entry in newcolumns:
        mdf_train[sp19_column] = mdf_train[sp19_column] + mdf_train[entry].astype(str)
        mdf_test[sp19_column] = mdf_test[sp19_column] + mdf_test[entry].astype(str)

      #extract categories for column labels
      #note that .unique() extracts the labels as a numpy array
      labels_train = list(mdf_train[sp19_column].unique())
      labels_train.sort()
      labels_test = list(mdf_test[sp19_column].unique())
      labels_test.sort()

      #if infill not present in train set, insert
      if 'zzzinfill' not in labels_train:
        labels_train = labels_train + ['zzzinfill']
        labels_train.sort()
      if 'zzzinfill' not in labels_test:
        labels_test = labels_test + ['zzzinfill']
        labels_test.sort()

      #get length of the list
      listlength = len(labels_train)

      #calculate number of columns we'll need
      binary_column_count = int(np.ceil(np.log2(listlength)))

      #initialize dictionaryt to store encodings
      binary_encoding_dict = {}
      encoding_list = []

      for i in range(listlength):

        #this converts the integer i to binary encoding
        #where f is an f string for inserting the column coount into the string to designate length of encoding
        #0 is to pad out the encoding with 0's for the length
        #and b is telling it to convert to binary 
        #note this returns a string
        encoding = format(i, f"0{binary_column_count}b")

        if i < len(labels_train):

          #store the encoding in a dictionary
          binary_encoding_dict.update({labels_train[i] : encoding})

          #store the encoding in a list for checking in next step
          encoding_list.append(encoding)


      #clear up memory
      del encoding_list
  #     del overlap_list

      #new driftreport metric _1010_activations_dict
      _1010_activations_dict = {}
      for key in binary_encoding_dict:
        sumcalc = (mdf_train[sp19_column] == key).sum() 
        ratio = sumcalc / mdf_train[sp19_column].shape[0]
        _1010_activations_dict.update({key:ratio})


      #replace the cateogries in train set via ordinal trasnformation
      mdf_train[sp19_column] = mdf_train[sp19_column].replace(binary_encoding_dict) 

      #in test set, we'll need to strike any categories that weren't present in train
      #first let'/s identify what applies
      testspecificcategories = list(set(labels_test)-set(labels_train))

      #so we'll just replace those items with our plug value
      testplug_dict = dict(zip(testspecificcategories, ['zzzinfill'] * len(testspecificcategories)))
      mdf_test[sp19_column] = mdf_test[sp19_column].replace(testplug_dict)  

      #now we'll apply the 1010 transformation to the test set
      mdf_test[sp19_column] = mdf_test[sp19_column].replace(binary_encoding_dict)    

      #ok let's create a list of columns to store each entry of the binary encoding
      _1010_columnlist = []

      for i in range(binary_column_count):

        _1010_columnlist.append(column + suffix + '_' + str(i))

      suffixoverlap_results = \
      self.df_check_suffixoverlap(mdf_train, _1010_columnlist, suffixoverlap_results)

      #now let's store the encoding
      i=0
      for _1010_column in _1010_columnlist:

        mdf_train[_1010_column] = mdf_train[sp19_column].str.slice(i,i+1).astype(np.int8)

        mdf_test[_1010_column] = mdf_test[sp19_column].str.slice(i,i+1).astype(np.int8)

        i+=1

      #now delete the support column
      del mdf_train[sp19_column]
      del mdf_test[sp19_column]

      for entry in newcolumns:
        del mdf_train[entry]
        del mdf_test[entry]

      #now store the column_dict entries
      categorylist = _1010_columnlist

      column_dict_list = []

      for tc in categorylist:

        #                                   '_1010_overlap_replace' : overlap_replace, \
        normalization_dict = {tc : {'suffix' : suffix, \
                                    'test_same_as_train' : test_same_as_train, \
                                    '_1010_binary_encoding_dict' : binary_encoding_dict, \
                                    '_1010_binary_column_count' : binary_column_count, \
                                    '_1010_activations_dict' : _1010_activations_dict, \
                                    'categorylist' : categorylist, \
                                    'overlap_dict' : overlap_dict, \
                                    'splt_newcolumns_sp19'   : newcolumns, \
                                    'minsplit' : minsplit, \
                                    'concurrent_activations' : concurrent_activations, \
                                    'preint_newcolumns' : preint_newcolumns, \
                                    'int_headers' : int_headers, \
                                    'int_labels_dict' : int_labels_dict, \
                                    'inverse_int_labels_dict' : inverse_int_labels_dict}}

        column_dict = {tc : {'category' : 'sp19', \
                             'origcategory' : category, \
                             'normalization_dict' : normalization_dict, \
                             'origcolumn' : column, \
                             'inputcolumn' : column, \
                             'columnslist' : categorylist, \
                             'categorylist' : categorylist, \
                             'infillmodel' : False, \
                             'infillcomplete' : False, \
                             'suffixoverlap_results' : suffixoverlap_results, \
                             'deletecolumn' : False}}

        column_dict_list.append(column_dict.copy())
      
    else:
      
      column_dict_list = []
    
    return mdf_train, mdf_test, column_dict_list

  def process_sbst_class(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    #process_sbst_class(mdf_train, mdf_test, column, category)
    #preprocess column with categorical entries as strings
    #identifies cases where a full unique value is present 
    #as a subset of a longer length unique value
    #and returns one-hot activations to aggregate those cases
    
    #accepts parameters concurrent_activations to allow mulitple activations
    #actually let's make concurrent activations the default
    #and int_headers for privacy preserving headers
    
    #this differs from other string parsing functions in that
    #only complete entries are checked for presence as subsets in other entries
    '''
    
    suffixoverlap_results = {}
      
    if 'concurrent_activations' in params:
      concurrent_activations = params['concurrent_activations']
    else:
      concurrent_activations = True
      
    if 'int_headers' in params:
      int_headers = params['int_headers']
    else:
      int_headers = False

    if 'minsplit' in params:
      minsplit = params['minsplit']
    else:
      minsplit = 1
      
    if 'suffix' in params:
      suffix = params['suffix']
    else:
      suffix = '_sbst'
      
    if 'test_same_as_train' in params:
      test_same_as_train = params['test_same_as_train']
    else:
      test_same_as_train = False
    
    #first we find overlaps from mdf_train
    
    unique_list = list(mdf_train[column].unique())

    unique_list = list(map(str, unique_list))
    
    unique_list = sorted(unique_list, key=len, reverse=True)
    
#     maxlength = max(len(x) for x in unique_list)
    
#     minlength = min(len(x) for x in unique_list)
    
#     overlap_lengths = list(range(maxlength - 1, minlength, -1))

    overlap_dict = {}

    #we'll populate overlap_dict as
    #{extract_with_overlap : [list of associate categories with that overlap]}

    #we'll cycle through the overlap lengths and only record an overlap 
    #if it is not a subset of those already recorded
    
    #unique is what we are searching for
    for unique in unique_list:
      len_unique = len(unique)

      if len_unique >= minsplit:
      
        #unique2 is where we are searching
        for unique2 in unique_list:
          len_unique2 = len(unique2)
          
          if len_unique2 > len_unique:
            
            nbr_iterations = len_unique2 - len_unique + 1
            
            for i in range(nbr_iterations):
              
              extract = unique2[i:(len_unique+i)]
              
              extract_already_in_overlap_dict = False
                    
              if extract_already_in_overlap_dict is False:
                
                if extract == unique:
                  
                  if extract in overlap_dict:
                    
                    if unique2 not in overlap_dict[extract]:
                      
                      overlap_dict[extract].append(unique2)
                      
                      if concurrent_activations is False:

                        break
                        
                    # if unique not in overlap_dict[extract]:
                      
                    #   overlap_dict[extract].append(unique)

                    #   if concurrent_activations is False:

                    #     break
                        
                  #else if we don't have a key for extract
                  else:

                    overlap_dict.update({extract : [unique, unique2]})

                    if concurrent_activations is False:

                      break
                    
    #now for mdf_test we'll only consider those overlaps already identified from train set
    
    if test_same_as_train is True:
      test_overlap_dict = overlap_dict
    
    elif test_same_as_train is False:

      unique_list_test = list(mdf_test[column].unique())

      unique_list_test = list(map(str, unique_list_test))

      unique_list_test = sorted(unique_list_test, key=len, reverse=True)

      test_overlap_dict = {}

      train_keys = list(overlap_dict)

      train_keys.sort(key = len, reverse=True)

      for key in train_keys:

        test_overlap_dict.update({key:[]})

      for dict_key in train_keys:

        for unique_test in unique_list_test:

          len_key = len(dict_key)

          if len(unique_test) >= len_key:

            nbr_iterations4 = len(unique_test) - len_key + 1

            for l in range(nbr_iterations4):

              extract4 = unique_test[l:(len_key+l)]

              if extract4 == dict_key:

                test_overlap_dict[dict_key].append(unique_test)

                if concurrent_activations is False:

                  break
                
    newcolumns = []

    for dict_key in overlap_dict:

      newcolumn = column + suffix + '_' + dict_key
      
      mdf_train, suffixoverlap_results = \
      self.df_copy_train(mdf_train, column, newcolumn, suffixoverlap_results)
      
#       mdf_train[newcolumn] = mdf_train[column].copy()
  
      mdf_test[newcolumn] = mdf_test[column].copy()

      mdf_train[newcolumn] = mdf_train[newcolumn].astype(str)
      mdf_test[newcolumn] = mdf_test[newcolumn].astype(str)

      mdf_train[newcolumn] = mdf_train[newcolumn].isin(overlap_dict[dict_key])
      mdf_train[newcolumn] = mdf_train[newcolumn].astype(np.int8)
      
      mdf_test[newcolumn] = mdf_test[newcolumn].isin(test_overlap_dict[dict_key])
      mdf_test[newcolumn] = mdf_test[newcolumn].astype(np.int8)

      newcolumns.append(newcolumn)
      
    preint_newcolumns = newcolumns.copy()
      
    if int_headers is True:
      
      int_labels_dict = {}
      i = 0
      for entry in newcolumns:
        int_labels_dict.update({entry : column + suffix + '_' + str(i)})
        i += 1
        
      newcolumns = [int_labels_dict[entry] for entry in newcolumns]
        
      #now convert column headers from string to int convention
      suffixoverlap_results = \
      self.df_check_suffixoverlap(mdf_train, newcolumns, suffixoverlap_results)
      
      mdf_train = mdf_train.rename(columns=int_labels_dict)
      mdf_test  = mdf_test.rename(columns=int_labels_dict)

      inverse_int_labels_dict = {value:key for key,value in int_labels_dict.items()}
      for key in inverse_int_labels_dict:
        inverse_int_labels_dict[key] = inverse_int_labels_dict[key][len(column) + 1:]
        
    else:
      int_labels_dict = False
      inverse_int_labels_dict = False
    
    column_dict_list = []

    for tc in newcolumns:

      textnormalization_dict = {tc : {'suffix' : suffix, \
                                      'test_same_as_train' : test_same_as_train, \
                                      'overlap_dict' : overlap_dict, \
                                      'splt_newcolumns_sbst'   : newcolumns, \
                                      'minsplit' : minsplit, \
                                      'concurrent_activations' : concurrent_activations, \
                                      'preint_newcolumns' : preint_newcolumns, \
                                      'int_headers' : int_headers, \
                                      'int_labels_dict' : int_labels_dict, \
                                      'inverse_int_labels_dict' : inverse_int_labels_dict}}
      
      column_dict = {tc : {'category' : 'sbst', \
                           'origcategory' : category, \
                           'normalization_dict' : textnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : newcolumns, \
                           'categorylist' : newcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
      
    if len(newcolumns) == 0:
      
      column_dict_list = []
    
    return mdf_train, mdf_test, column_dict_list

  def process_sbs3_class(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    #process_sbst_class(mdf_train, mdf_test, column, category)
    #preprocess column with categorical entries as strings
    #identifies cases where a full unique value is present 
    #as a subset of a longer length unique value
    #and returns one-hot activations to aggregate those cases
    
    #accepts parameters concurrent_activations to allow mulitple activations
    #actually let's make concurrent activations the default
    #and int_headers for privacy preserving headers
    
    #this differs from other string parsing functions in that
    #only complete entries are checked for presence as subsets in other entries
    
    #sbs3 is comparable to sbst but with a returned binary encoding aggregation
    '''
    
    suffixoverlap_results = {}
      
    if 'concurrent_activations' in params:
      concurrent_activations = params['concurrent_activations']
    else:
      concurrent_activations = True
      
    if 'int_headers' in params:
      int_headers = params['int_headers']
    else:
      int_headers = False

    if 'minsplit' in params:
      minsplit = params['minsplit']
    else:
      minsplit = 1
      
    if 'suffix' in params:
      suffix = params['suffix']
    else:
      suffix = '_sbs3'
      
    if 'test_same_as_train' in params:
      test_same_as_train = params['test_same_as_train']
    else:
      test_same_as_train = False
    
    #first we find overlaps from mdf_train
    
    unique_list = list(mdf_train[column].unique())

    unique_list = list(map(str, unique_list))
    
    unique_list = sorted(unique_list, key=len, reverse=True)
    
#     maxlength = max(len(x) for x in unique_list)
    
#     minlength = min(len(x) for x in unique_list)
    
#     overlap_lengths = list(range(maxlength - 1, minlength, -1))

    overlap_dict = {}

    #we'll populate overlap_dict as
    #{extract_with_overlap : [list of associate categories with that overlap]}

    #we'll cycle through the overlap lengths and only record an overlap 
    #if it is not a subset of those already recorded
    
    #unique is what we are searching for
    for unique in unique_list:
      len_unique = len(unique)

      if len_unique >= minsplit:
      
        #unique2 is where we are searching
        for unique2 in unique_list:
          len_unique2 = len(unique2)
          
          if len_unique2 > len_unique:
            
            nbr_iterations = len_unique2 - len_unique + 1
            
            for i in range(nbr_iterations):
              
              extract = unique2[i:(len_unique+i)]
              
              extract_already_in_overlap_dict = False
                    
              if extract_already_in_overlap_dict is False:
                
                if extract == unique:
                  
                  if extract in overlap_dict:
                    
                    if unique2 not in overlap_dict[extract]:
                      
                      overlap_dict[extract].append(unique2)
                      
                      if concurrent_activations is False:

                        break
                        
                    # if unique not in overlap_dict[extract]:
                      
                    #   overlap_dict[extract].append(unique)

                    #   if concurrent_activations is False:

                    #     break
                        
                  #else if we don't have a key for extract
                  else:

                    overlap_dict.update({extract : [unique, unique2]})

                    if concurrent_activations is False:

                      break
                    
    #now for mdf_test we'll only consider those overlaps already identified from train set
    
    if test_same_as_train is True:
      test_overlap_dict = overlap_dict
    
    elif test_same_as_train is False:

      unique_list_test = list(mdf_test[column].unique())

      unique_list_test = list(map(str, unique_list_test))

      unique_list_test = sorted(unique_list_test, key=len, reverse=True)

      test_overlap_dict = {}

      train_keys = list(overlap_dict)

      train_keys.sort(key = len, reverse=True)

      for key in train_keys:

        test_overlap_dict.update({key:[]})

      for dict_key in train_keys:

        for unique_test in unique_list_test:

          len_key = len(dict_key)

          if len(unique_test) >= len_key:

            nbr_iterations4 = len(unique_test) - len_key + 1

            for l in range(nbr_iterations4):

              extract4 = unique_test[l:(len_key+l)]

              if extract4 == dict_key:

                test_overlap_dict[dict_key].append(unique_test)

                if concurrent_activations is False:

                  break
                
    newcolumns = []

    for dict_key in overlap_dict:

      newcolumn = column + '_sbst_' + dict_key
      
      mdf_train, suffixoverlap_results = \
      self.df_copy_train(mdf_train, column, newcolumn, suffixoverlap_results)
      
#       mdf_train[newcolumn] = mdf_train[column].copy()
  
      mdf_test[newcolumn] = mdf_test[column].copy()

      mdf_train[newcolumn] = mdf_train[newcolumn].astype(str)
      mdf_test[newcolumn] = mdf_test[newcolumn].astype(str)

      mdf_train[newcolumn] = mdf_train[newcolumn].isin(overlap_dict[dict_key])
      mdf_train[newcolumn] = mdf_train[newcolumn].astype(np.int8)
      
      mdf_test[newcolumn] = mdf_test[newcolumn].isin(test_overlap_dict[dict_key])
      mdf_test[newcolumn] = mdf_test[newcolumn].astype(np.int8)

      newcolumns.append(newcolumn)
      
    preint_newcolumns = newcolumns.copy()
      
    if int_headers is True:
      
      int_labels_dict = {}
      i = 0
      for entry in newcolumns:
        int_labels_dict.update({entry : column + '_sbst_' + str(i)})
        i += 1
        
      newcolumns = [int_labels_dict[entry] for entry in newcolumns]
        
      #now convert column headers from string to int convention
      suffixoverlap_results = \
      self.df_check_suffixoverlap(mdf_train, newcolumns, suffixoverlap_results)
      
      mdf_train = mdf_train.rename(columns=int_labels_dict)
      mdf_test  = mdf_test.rename(columns=int_labels_dict)

      inverse_int_labels_dict = {value:key for key,value in int_labels_dict.items()}
      for key in inverse_int_labels_dict:
        inverse_int_labels_dict[key] = inverse_int_labels_dict[key][len(column) + 1:]
        
    else:
      int_labels_dict = False
      inverse_int_labels_dict = False
    
    column_dict_list = []
    
    #begin binary encoding of set, leaving the int_headers here out of convenience, not really needed
    
    if len(newcolumns) > 0:
      
      sbs3_column = column + suffix

      #aggregate collection of activations as string set
      #the suffix 'activations_' is to avoid potential of overlap with binary encoding and aggregated activations
      mdf_train[sbs3_column] = 'activations_'
      mdf_test[sbs3_column] = 'activations_'

      for entry in newcolumns:
        mdf_train[sbs3_column] = mdf_train[sbs3_column] + mdf_train[entry].astype(str)
        mdf_test[sbs3_column] = mdf_test[sbs3_column] + mdf_test[entry].astype(str)

      #extract categories for column labels
      #note that .unique() extracts the labels as a numpy array
      labels_train = list(mdf_train[sbs3_column].unique())
      labels_train.sort()
      labels_test = list(mdf_test[sbs3_column].unique())
      labels_test.sort()

      #if infill not present in train set, insert
      if 'zzzinfill' not in labels_train:
        labels_train = labels_train + ['zzzinfill']
        labels_train.sort()
      if 'zzzinfill' not in labels_test:
        labels_test = labels_test + ['zzzinfill']
        labels_test.sort()

      #get length of the list
      listlength = len(labels_train)

      #calculate number of columns we'll need
      binary_column_count = int(np.ceil(np.log2(listlength)))

      #initialize dictionaryt to store encodings
      binary_encoding_dict = {}
      encoding_list = []

      for i in range(listlength):

        #this converts the integer i to binary encoding
        #where f is an f string for inserting the column coount into the string to designate length of encoding
        #0 is to pad out the encoding with 0's for the length
        #and b is telling it to convert to binary 
        #note this returns a string
        encoding = format(i, f"0{binary_column_count}b")

        if i < len(labels_train):

          #store the encoding in a dictionary
          binary_encoding_dict.update({labels_train[i] : encoding})

          #store the encoding in a list for checking in next step
          encoding_list.append(encoding)


      #clear up memory
      del encoding_list
  #     del overlap_list

      #new driftreport metric _1010_activations_dict
      _1010_activations_dict = {}
      for key in binary_encoding_dict:
        sumcalc = (mdf_train[sbs3_column] == key).sum() 
        ratio = sumcalc / mdf_train[sbs3_column].shape[0]
        _1010_activations_dict.update({key:ratio})


      #replace the cateogries in train set via ordinal trasnformation
      mdf_train[sbs3_column] = mdf_train[sbs3_column].replace(binary_encoding_dict) 

      #in test set, we'll need to strike any categories that weren't present in train
      #first let'/s identify what applies
      testspecificcategories = list(set(labels_test)-set(labels_train))

      #so we'll just replace those items with our plug value
      testplug_dict = dict(zip(testspecificcategories, ['zzzinfill'] * len(testspecificcategories)))
      mdf_test[sbs3_column] = mdf_test[sbs3_column].replace(testplug_dict)  

      #now we'll apply the 1010 transformation to the test set
      mdf_test[sbs3_column] = mdf_test[sbs3_column].replace(binary_encoding_dict)    

      #ok let's create a list of columns to store each entry of the binary encoding
      _1010_columnlist = []

      for i in range(binary_column_count):

        _1010_columnlist.append(column + suffix + '_' + str(i))

      suffixoverlap_results = \
      self.df_check_suffixoverlap(mdf_train, _1010_columnlist, suffixoverlap_results)

      #now let's store the encoding
      i=0
      for _1010_column in _1010_columnlist:

        mdf_train[_1010_column] = mdf_train[sbs3_column].str.slice(i,i+1).astype(np.int8)

        mdf_test[_1010_column] = mdf_test[sbs3_column].str.slice(i,i+1).astype(np.int8)

        i+=1

      #now delete the support column
      del mdf_train[sbs3_column]
      del mdf_test[sbs3_column]

      for entry in newcolumns:
        del mdf_train[entry]
        del mdf_test[entry]

      #now store the column_dict entries
      categorylist = _1010_columnlist

      column_dict_list = []

      for tc in categorylist:

  #                                   '_1010_overlap_replace' : overlap_replace, \
        normalization_dict = {tc : {'suffix' : suffix, \
                                    'test_same_as_train' : test_same_as_train, \
                                    '_1010_binary_encoding_dict' : binary_encoding_dict, \
                                    '_1010_binary_column_count' : binary_column_count, \
                                    '_1010_activations_dict' : _1010_activations_dict, \
                                    'categorylist' : categorylist, \
                                    'overlap_dict' : overlap_dict, \
                                    'splt_newcolumns_sbs3'   : newcolumns, \
                                    'concurrent_activations' : concurrent_activations, \
                                    'minsplit' : minsplit, \
                                    'preint_newcolumns' : preint_newcolumns, \
                                    'int_headers' : int_headers, \
                                    'int_labels_dict' : int_labels_dict, \
                                    'inverse_int_labels_dict' : inverse_int_labels_dict}}

        column_dict = {tc : {'category' : 'sbs3', \
                             'origcategory' : category, \
                             'normalization_dict' : normalization_dict, \
                             'origcolumn' : column, \
                             'inputcolumn' : column, \
                             'columnslist' : categorylist, \
                             'categorylist' : categorylist, \
                             'infillmodel' : False, \
                             'infillcomplete' : False, \
                             'suffixoverlap_results' : suffixoverlap_results, \
                             'deletecolumn' : False}}

        column_dict_list.append(column_dict.copy())
      
    else:
      
      column_dict_list = []
    
    return mdf_train, mdf_test, column_dict_list

  def process_srch_class(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    """
    #process_srch_class(mdf_train, mdf_test, column, category)
    #preprocess column with categorical entries as strings
    #relies on user passed list of strings in search parameter
    #string parses unique entries to identify overlaps with search strings
    #when overlap found returns a column with boolean activation identifiers
    
    #note this differs from original srch in that makes use of pandas str.contains
    #which is expected to be more efficient for unbounded sets
    
    #for example, if a categoical set consisted of unique values 
    #['west', 'north', 'northwest']
    #and a user passed the search parameter as ['west']
    #then a new column would be returned 
    #with activations corresponding to entries of 'west' and 'northwest'
    
    #note that search parameter can include lists of search terms embedded in the list
    #which embedded lists will be aggregated to a single activation
    #for example if we want single activation for female names could pass search = [['Ms.', 'Miss', 'Mrs']] etc
    
    #note this returns all zeros in a column if search value not found
    
    #note returned coluymns are named by search term, e
    #e.g. column + '_srch_' + str(search)
    
    #note that search terms are converted to strings and compared to columns cast as strings

    #missing values are ignored by default
    """
    
    suffixoverlap_results = {}
        
    if 'search' in params:
      search = params['search']
    else:
      search = []
      
    if 'case' in params:
      case = params['case']
    else:
      case = True
      
    #we'll create mirror to account for any embdded lists of search terms for aggregation
    search_preflattening = search.copy()
    #this is kind of hacky just to reuse code below resetting this list to repopulate
    search = []
    aggregated_dict = {}
    
    for entry in search_preflattening:
      if type(entry) != type([]):
        search.append(str(entry))
      else:
        aggregated_dict.update({str(entry[-1]):[]})
        for entry2 in entry[0:-1]:
          search.append(entry2)
          aggregated_dict[str(entry[-1])].append(str(entry2))
        for entry2 in entry[-1:]:
          search.append(entry2)
    
    newcolumns = []
    search_dict = {}
    for searchitem in search:
      search_dict.update({column + '_srch_' + str(searchitem) : str(searchitem)})
      
    for newcolumn in search_dict:
      suffixoverlap_results = \
      self.df_check_suffixoverlap(mdf_train, newcolumn, suffixoverlap_results)
      
      mdf_train[newcolumn] = \
      np.where(mdf_train[column].astype(str).str.contains(search_dict[newcolumn], case=case, regex=False), 1, 0)
      
      mdf_test[newcolumn] = \
      np.where(mdf_test[column].astype(str).str.contains(search_dict[newcolumn], case=case, regex=False), 1, 0)
    
    newcolumns = list(search_dict)
    
    #now we'll address any aggregations fo search terms
    #from search parameter passed with embedded list of search terms
          
    #then after populating activations, we'll put this below
    #inverse_search_dict has key of search term and value of column for activations
    inverse_search_dict = {value:key for key,value in search_dict.items()}
    newcolumns_before_aggregation = newcolumns.copy()
    
    #now we consolidate activations
    #note that this only runs when aggregated_dict was populated with an embedded list of search terms
    for aggregated_dict_key in aggregated_dict:
      aggregated_dict_key_column = inverse_search_dict[aggregated_dict_key]
      
      for target_for_aggregation in aggregated_dict[aggregated_dict_key]:
        target_for_aggregation_column = inverse_search_dict[target_for_aggregation]
        
        mdf_train[aggregated_dict_key_column] = \
        np.where(mdf_train[target_for_aggregation_column] == 1, 1, mdf_train[aggregated_dict_key_column])
        mdf_test[aggregated_dict_key_column] = \
        np.where(mdf_test[target_for_aggregation_column] == 1, 1, mdf_test[aggregated_dict_key_column])
        
        del mdf_train[target_for_aggregation_column]
        del mdf_test[target_for_aggregation_column]
        
        newcolumns.remove(target_for_aggregation_column)
    
    for newcolumn in newcolumns:

      mdf_train[newcolumn] = mdf_train[newcolumn].astype(np.int8)
      mdf_test[newcolumn] = mdf_test[newcolumn].astype(np.int8)
    
    column_dict_list = []

    for tc in newcolumns:

      textnormalization_dict = {tc : {'search_dict' : search_dict, \
                                      'inverse_search_dict' : inverse_search_dict, \
                                      'srch_newcolumns_srch'   : newcolumns, \
                                      'newcolumns_before_aggregation' : newcolumns_before_aggregation, \
                                      'search' : search, \
                                      'search_preflattening' : search_preflattening, \
                                      'aggregated_dict' : aggregated_dict, \
                                      'case' : case}}
      
      column_dict = {tc : {'category' : 'srch', \
                           'origcategory' : category, \
                           'normalization_dict' : textnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : newcolumns, \
                           'categorylist' : newcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
      
    if len(newcolumns) == 0:
      
      column_dict_list = []
    
    return mdf_train, mdf_test, column_dict_list

  def process_src2_class(self, mdf_train, mdf_test, column, category, \
                        postprocess_dict, params = {}):
    """
    #process_src2_class(mdf_train, mdf_test, column, category)
    #preprocess column with categorical entries as strings
    #relies on user passed list of strings in search parameter
    #string parses unique entries to identify overlaps with search strings
    #when overlap found returns a column with boolean activation identifiers
    
    #for example, if a categoical set consisted of unique values 
    #['west', 'north', 'northwest']
    #and a user passed the search parameter as ['west']
    #then a new column would be returned 
    #with activations corresponding to entries of 'west' and 'northwest'

    #missing values are ignored by default
    
    #assumes that unique values of test set are same or subset of train set
    #for more efficient application in postmunge
    """
    
    suffixoverlap_results = {}
        
    if 'search' in params:
      search = params['search']
    else:
      search = []
    
    #first we find overlaps from mdf_train
    
    unique_list = list(mdf_train[column].unique())

    unique_list = list(map(str, unique_list))
    
#     maxlength = max(len(x) for x in unique_list)
    
#     overlap_lengths = list(range(maxlength - 1, minsplit, -1))

    #we'll create mirror to account for any embdded lists of search terms for aggregation
    search_preflattening = search.copy()
    #this is kind of hacky just to reuse code below resetting this list to repopulate
    search = []
    aggregated_dict = {}
    
    for entry in search_preflattening:
      if type(entry) != type([]):
        search.append(str(entry))
      else:
        aggregated_dict.update({str(entry[-1]):[]})
        for entry2 in entry[0:-1]:
          search.append(entry2)
          aggregated_dict[str(entry[-1])].append(str(entry2))
        for entry2 in entry[-1:]:
          search.append(entry2)

    #we'll populate overlap_dict as
    #{search_string : [list of associate categories with that overlap found]}
    
    overlap_dict = {}
    
    for search_string in search:
      
      overlap_dict.update({search_string : []})
    
    for search_string in search:
      
      len_search_string = len(search_string)
    
      for unique in unique_list:
        
        len_unique = len(unique)
        
        if len_unique >= len_search_string:
          
          nbr_iterations = len_unique - len_search_string
          
          for i in range(nbr_iterations + 1):
            
            extract = unique[i:(len_search_string+i)]
            
            if extract in search:
              
              overlap_dict[extract].append(unique)
    
#     #now for mdf_test
    
#     unique_list_test = list(mdf_test[column].unique())

#     unique_list_test = list(map(str, unique_list_test))

#     test_overlap_dict = {}
    
#     for search_string in search:
      
#       test_overlap_dict.update({search_string : []})
    

#     train_keys = list(overlap_dict)

#     train_keys.sort(key = len, reverse=True)

#     for dict_key in train_keys:

#       for unique_test in unique_list_test:

#         len_key = len(dict_key)

#         if len(unique_test) >= len_key:

#           nbr_iterations4 = len(unique_test) - len_key

#           for l in range(nbr_iterations4 + 1):

#             extract4 = unique_test[l:(len_key+l)]

#             if extract4 == dict_key:

#               test_overlap_dict[dict_key].append(unique_test)
    
    newcolumns = []

    for dict_key in overlap_dict:
      
      if len(overlap_dict[dict_key]) > 0:

        newcolumn = column + '_src2_' + dict_key

        mdf_train, suffixoverlap_results = \
        self.df_copy_train(mdf_train, column, newcolumn, suffixoverlap_results)
        
        mdf_test[newcolumn] = mdf_test[column].copy()

        mdf_train[newcolumn] = mdf_train[newcolumn].astype(str)
        mdf_test[newcolumn] = mdf_test[newcolumn].astype(str)

        mdf_train[newcolumn] = mdf_train[newcolumn].isin(overlap_dict[dict_key])
#         mdf_train[newcolumn] = mdf_train[newcolumn].astype(np.int8)

        mdf_test[newcolumn] = mdf_test[newcolumn].isin(overlap_dict[dict_key])
#         mdf_test[newcolumn] = mdf_test[newcolumn].astype(np.int8)

        newcolumns.append(newcolumn)
    
    #now in case there are any aggregated activations, inspired by approach in srch
    inverse_search_dict = dict(zip(search, newcolumns))
    newcolumns_before_aggregation = newcolumns.copy()
    
    #now we consolidate activations
    #note that this only runs when aggregated_dict was populated with an embedded list of search terms
    for aggregated_dict_key in aggregated_dict:
      aggregated_dict_key_column = inverse_search_dict[aggregated_dict_key]
      
      for target_for_aggregation in aggregated_dict[aggregated_dict_key]:
        target_for_aggregation_column = inverse_search_dict[target_for_aggregation]
        
        mdf_train[aggregated_dict_key_column] = \
        np.where(mdf_train[target_for_aggregation_column] == 1, 1, mdf_train[aggregated_dict_key_column])
        mdf_test[aggregated_dict_key_column] = \
        np.where(mdf_test[target_for_aggregation_column] == 1, 1, mdf_test[aggregated_dict_key_column])
        
        del mdf_train[target_for_aggregation_column]
        del mdf_test[target_for_aggregation_column]
        
        newcolumns.remove(target_for_aggregation_column)
        
    for newcolumn in newcolumns:
      mdf_train[newcolumn] = mdf_train[newcolumn].astype(np.int8)
      mdf_test[newcolumn] = mdf_test[newcolumn].astype(np.int8)
    
    column_dict_list = []

    for tc in newcolumns:

      textnormalization_dict = {tc : {'overlap_dict' : overlap_dict, \
                                      'src2_newcolumns_src2'   : newcolumns, \
                                      'newcolumns_before_aggregation' : newcolumns_before_aggregation, \
                                      'search' : search, \
                                      'inverse_search_dict' : inverse_search_dict, \
                                      'aggregated_dict' : aggregated_dict, \
                                      'search_preflattening' : search_preflattening}}
      
      column_dict = {tc : {'category' : 'src2', \
                           'origcategory' : category, \
                           'normalization_dict' : textnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : newcolumns, \
                           'categorylist' : newcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
      
    if len(newcolumns) == 0:
      
      column_dict_list = []
    
    return mdf_train, mdf_test, column_dict_list
  
  def process_src3_class(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    """
    #process_src3_class(mdf_train, mdf_test, column, category)
    #preprocess column with categorical entries as strings
    #relies on user passed list of strings in search parameter
    #string parses unique entries to identify overlaps with search strings
    #when overlap found returns a column with boolean activation identifiers
    
    #for example, if a categoical set consisted of unique values 
    #['west', 'north', 'northwest']
    #and a user passed the search parameter as ['west']
    #then a new column would be returned 
    #with activations corresponding to entries of 'west' and 'northwest'

    #missing values are ignored by default
    
    #where srch is preferred for unbounded range of unique values
    
    #and src2 preferred when have bounded range of unique values for both train & test
    
    #and speculation is that src3 may be preferred when have a bounded
    #range of unique values but still want capacity to handle values in 
    #test set not found in train set
    """
    
    suffixoverlap_results = {}
        
    if 'search' in params:
      search = params['search']
    else:
      search = []
    
    #first we find overlaps from mdf_train
    
    unique_list = list(mdf_train[column].unique())

    unique_list = list(map(str, unique_list))
    
#     maxlength = max(len(x) for x in unique_list)
    
#     overlap_lengths = list(range(maxlength - 1, minsplit, -1))

    #we'll populate overlap_dict as
    #{search_string : [list of associate categories with that overlap found]}
    
    overlap_dict = {}
    
    for search_string in search:
      
      overlap_dict.update({search_string : []})
    
    for search_string in search:
      
      len_search_string = len(search_string)
    
      for unique in unique_list:
        
        len_unique = len(unique)
        
        if len_unique >= len_search_string:
          
          nbr_iterations = len_unique - len_search_string
          
          for i in range(nbr_iterations + 1):
            
            extract = unique[i:(len_search_string+i)]
            
            if extract in search:
              
              overlap_dict[extract].append(unique)
           
    #now for mdf_test
    
    unique_list_test = list(mdf_test[column].unique())

    unique_list_test = list(map(str, unique_list_test))

    test_overlap_dict = {}
    
    for search_string in search:
      
      test_overlap_dict.update({search_string : []})
    
    train_keys = list(overlap_dict)

    train_keys.sort(key = len, reverse=True)

    for dict_key in train_keys:

      for unique_test in unique_list_test:

        len_key = len(dict_key)

        if len(unique_test) >= len_key:

          nbr_iterations4 = len(unique_test) - len_key

          for l in range(nbr_iterations4 + 1):

            extract4 = unique_test[l:(len_key+l)]

            if extract4 == dict_key:

              test_overlap_dict[dict_key].append(unique_test)
    
    newcolumns = []

    for dict_key in overlap_dict:
      
      if len(overlap_dict[dict_key]) > 0:

        newcolumn = column + '_src3_' + dict_key

        mdf_train, suffixoverlap_results = \
        self.df_copy_train(mdf_train, column, newcolumn, suffixoverlap_results)
        
        mdf_test[newcolumn] = mdf_test[column].copy()

        mdf_train[newcolumn] = mdf_train[newcolumn].astype(str)
        mdf_test[newcolumn] = mdf_test[newcolumn].astype(str)

        mdf_train[newcolumn] = mdf_train[newcolumn].isin(overlap_dict[dict_key])
        mdf_train[newcolumn] = mdf_train[newcolumn].astype(np.int8)

        mdf_test[newcolumn] = mdf_test[newcolumn].isin(test_overlap_dict[dict_key])
        mdf_test[newcolumn] = mdf_test[newcolumn].astype(np.int8)

        newcolumns.append(newcolumn)
    
    column_dict_list = []

    for tc in newcolumns:

      textnormalization_dict = {tc : {'overlap_dict' : overlap_dict, \
                                      'srch_newcolumns_src3'   : newcolumns, \
                                      'search' : search}}
      
      column_dict = {tc : {'category' : 'src3', \
                           'origcategory' : category, \
                           'normalization_dict' : textnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : newcolumns, \
                           'categorylist' : newcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
      
    if len(newcolumns) == 0:
      
      column_dict_list = []
    
    return mdf_train, mdf_test, column_dict_list

  def process_src4_class(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    """
    #process_src4_class(mdf_train, mdf_test, column, category)
    #preprocess column with categorical entries as strings
    #relies on user passed list of strings in search parameter
    #string parses unique entries to identify overlaps with search strings
    #when overlap found returns a column with boolean activation identifiers
    
    #note this differs from original srch in that makes use of pandas str.contains
    #which is expected to be more efficient for unbounded sets
    
    #for example, if a categoical set consisted of unique values 
    #['west', 'north', 'northwest']
    #and a user passed the search parameter as ['west']
    #then a new column would be returned 
    #with activations corresponding to entries of 'west' and 'northwest'
    
    #note this returns all zeros in a column if search value not found
    
    #note returned coluymns are named by search term, e
    #e.g. column + '_srch_' + str(search)
    
    #note that search terms are converted to strings and compared to columns cast as strings

    #missing values are ignored by default
    
    #src4 builds on the srch by converting to an ordinal activation
    #with 0 reserved for no activations
    #note that if an entry was activated for multiple search terms
    #the order of entries in search parameter will dictate the final encoding
    #(e.g. entries at end of list are prioritized over beginning)
    """
    
    suffixoverlap_results = {}
        
    if 'search' in params:
      search = params['search']
    else:
      search = []
      
    if 'case' in params:
      case = params['case']
    else:
      case = True
      
    #we'll create mirror to account for any embdded lists of search terms for aggregation
    search_preflattening = search.copy()
    #this is kind of hacky just to reuse code below resetting this list to repopulate
    search = []
    aggregated_dict = {}
    
    for entry in search_preflattening:
      if type(entry) != type([]):
        search.append(str(entry))
      else:
        aggregated_dict.update({str(entry[-1]):[]})
        for entry2 in entry[0:-1]:
          search.append(entry2)
          aggregated_dict[str(entry[-1])].append(str(entry2))
        for entry2 in entry[-1:]:
          search.append(entry2)
    
    newcolumns = []
    search_dict = {}
    for searchitem in search:
      search_dict.update({column + '_src4_' + str(searchitem) : str(searchitem)})
      
    for newcolumn in search_dict:
      suffixoverlap_results = \
      self.df_check_suffixoverlap(mdf_train, newcolumn, suffixoverlap_results)
      
      mdf_train[newcolumn] = \
      np.where(mdf_train[column].astype(str).str.contains(search_dict[newcolumn], case=case, regex=False), 1, 0)
      
      mdf_test[newcolumn] = \
      np.where(mdf_test[column].astype(str).str.contains(search_dict[newcolumn], case=case, regex=False), 1, 0)
    
    newcolumns = list(search_dict)

#     for newcolumn in newcolumns:

#       mdf_train[newcolumn] = mdf_train[newcolumn].astype(np.int8)
#       mdf_test[newcolumn] = mdf_test[newcolumn].astype(np.int8)
      
    #ok now let's convert to ordinal for src4
    ordl_dict1 = {}
    ordl_dict2 = {}
    
    #reserve zero for no activations
    i = 1
    for newcolumn in newcolumns:
      ordl_dict1.update({i : newcolumn})
      ordl_dict2.update({newcolumn : i})
      i += 1
      
    suffixoverlap_results = \
    self.df_check_suffixoverlap(mdf_train, column + '_src4', suffixoverlap_results)
      
    mdf_train[column + '_src4'] = 0
    mdf_test[column + '_src4'] = 0
    
    for newcolumn in newcolumns:
      
      mdf_train[column + '_src4'] = \
      np.where(mdf_train[newcolumn] == 1, ordl_dict2[newcolumn], mdf_train[column + '_src4'])
      mdf_test[column + '_src4'] = \
      np.where(mdf_test[newcolumn] == 1, ordl_dict2[newcolumn], mdf_test[column + '_src4'])
      del mdf_train[newcolumn]
      del mdf_test[newcolumn]
      
    #now we'll address any aggregations fo search terms
    #from search parameter passed with embedded list of search terms
          
    #then after populating activations, we'll put this below
    #inverse_search_dict has key of search term and value of column for activations
    inverse_search_dict = {value:key for key,value in search_dict.items()}
#     newcolumns_before_aggregation = newcolumns.copy()
      
    #now we consolidate activations
    #note that this only runs when aggregated_dict was populated with an embedded list of search terms
    for aggregated_dict_key in aggregated_dict:
      aggregated_dict_key_column = inverse_search_dict[aggregated_dict_key]
      aggregated_dict_key_encoding = ordl_dict2[aggregated_dict_key_column]
      
      for target_for_aggregation in aggregated_dict[aggregated_dict_key]:
        target_for_aggregation_column = inverse_search_dict[target_for_aggregation]
        target_for_aggregation_encoding = ordl_dict2[target_for_aggregation_column]
        
        mdf_train[column + '_src4'] = \
        np.where(mdf_train[column + '_src4'] == target_for_aggregation_encoding, aggregated_dict_key_encoding, mdf_train[column + '_src4'])
        mdf_test[column + '_src4'] = \
        np.where(mdf_test[column + '_src4'] == target_for_aggregation_encoding, aggregated_dict_key_encoding, mdf_test[column + '_src4'])

    #we'll base the integer type on number of ordinal entries
    if len(ordl_dict1) < 254:
      mdf_train[column + '_src4'] = mdf_train[column + '_src4'].astype(np.uint8)
      mdf_test[column + '_src4'] = mdf_test[column + '_src4'].astype(np.uint8)
    elif len(ordl_dict1) < 65530:
      mdf_train[column + '_src4'] = mdf_train[column + '_src4'].astype(np.uint16)
      mdf_test[column + '_src4'] = mdf_test[column + '_src4'].astype(np.uint16)
    else:
      mdf_train[column + '_src4'] = mdf_train[column + '_src4'].astype(np.uint32)
      mdf_test[column + '_src4'] = mdf_test[column + '_src4'].astype(np.uint32)
    
    column_dict_list = []
    
    #newcolumns are based on the original srch transform
    #src4_newcolumns are after consolidating to ordinal encoding (single entry)
    src4_newcolumns = [column + '_src4']

    for tc in src4_newcolumns:

      textnormalization_dict = {tc : {'search_dict' : search_dict, \
                                      'inverse_search_dict' : inverse_search_dict, \
                                      'srch_newcolumns_src4' : newcolumns, \
                                      'src4_newcolumns' : src4_newcolumns, \
                                      'search' : search, \
                                      'search_preflattening' : search_preflattening, \
                                      'aggregated_dict' : aggregated_dict, \
                                      'case' : case, \
                                      'ordl_dict1' : ordl_dict1, \
                                      'activations_list' : list(ordl_dict1), \
                                      'ordl_dict2' : ordl_dict2}}
      
      column_dict = {tc : {'category' : 'src4', \
                           'origcategory' : category, \
                           'normalization_dict' : textnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : src4_newcolumns, \
                           'categorylist' : src4_newcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())

    return mdf_train, mdf_test, column_dict_list
  
  def process_aggt_class(self, df, column, category, postprocess_dict, params = {}):
    """
    #process_aggt_class(mdf_train, mdf_test, column, category)
    #preprocess column with categorical entries as strings
    #and aggregates differently spelled duplicates into single representation
    #based on user passed parameter 'aggregate'
    #which is a list of lists, where sublists are the aggregation groups
    #and the final representation will be the final item in list
    #note also supports passing aggregate as a single list of terms without embedded lists
    """
    
    suffixoverlap_results = {}
    
    if 'aggregate' in params:
      aggregate = params['aggregate']
    else:
      aggregate = [[]]
      
    df, suffixoverlap_results = \
    self.df_copy_train(df, column, column + '_aggt', suffixoverlap_results)

    for sublist in aggregate:
      
      if not isinstance(sublist, list):
        
        sublist = aggregate
      
        length_sublist = len(sublist)

        for i in range(length_sublist-1):

          df[column + '_aggt'] = np.where(df[column + '_aggt'] == sublist[i], sublist[-1], df[column + '_aggt'])
          
        break
      
      else:
        
        length_sublist = len(sublist)

        for i in range(length_sublist-1):

          df[column + '_aggt'] = np.where(df[column + '_aggt'] == sublist[i], sublist[-1], df[column + '_aggt'])

    normalization_dict = {column + '_aggt' : {'aggregate' : aggregate}}
    
    nmbrcolumns = [column + '_aggt']
    
    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:
      
      column_dict = { nc : {'category' : 'aggt', \
                           'origcategory' : category, \
                           'normalization_dict' : normalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return df, column_dict_list

  def process_strn_class(self, df, column, category, postprocess_dict, params = {}):
    """
    #process_strn_class(df, column, category, postprocess_dict)
    #parses string entries and if any strings present returns longest string
    #i.e. character subsets excluding numerical entries
    #entries without strings present subject to infill
    """
    
    suffixoverlap_results = {}
    
    unique_list = list(df[column].unique())

    unique_list = list(map(str, unique_list))
    
    maxlength = max(len(x) for x in unique_list)
    
    overlap_lengths = list(range(maxlength, 0, -1))

    overlap_dict = {}
    
    for overlap_length in overlap_lengths:

      for unique in unique_list:
        
        if unique not in overlap_dict:

          len_unique = len(unique)

          if len_unique >= overlap_length:
            
            if overlap_length > 1:

              nbr_iterations = len_unique - overlap_length

              for i in range(nbr_iterations + 1):
                
                if unique not in overlap_dict:

                  extract = unique[i:(overlap_length+i)]
                  
                  has_number = False
                  
                  for j in range(len(extract)):
                    
                    if self.is_number(extract[j]):
                      
                      has_number = True

  #                 extract_already_in_overlap_dict = False

                  if has_number is False:

                    overlap_dict.update({unique : extract})
                
            #else if overlap_length == 1    
            else:
              
              nbr_iterations = len_unique - overlap_length
              
              in_dict = False

              for i in range(nbr_iterations + 1):
                
                if unique not in overlap_dict:

                  extract = unique[i:(overlap_length+i)]

  #                 extract_already_in_overlap_dict = False
  
                  has_number = False
                  
                  for j in range(len(extract)):
                    
                    if self.is_number(extract[j]):
                      
                      has_number = True

  #                 extract_already_in_overlap_dict = False

                  if has_number is False:
      
                    in_dict = True

                    overlap_dict.update({unique : extract})
                  
              if in_dict is False:

                overlap_dict.update({unique : np.nan})
    
    suffixoverlap_results = \
    self.df_check_suffixoverlap(df, column + '_strn', suffixoverlap_results)
    
    df[column + '_strn'] = df[column].astype(str)
    df[column + '_strn'] = df[column + '_strn'].replace(overlap_dict)

    #replace missing data with training set mean as default infill
    df[column + '_strn'] = df[column + '_strn'].fillna('zzzinfill')
    
#     #a few more metrics collected for driftreport
#     #get maximum value of training column
#     maximum = df[column + '_nmrc'].max()
#     #get minimum value of training column
#     minimum = df[column + '_nmrc'].min()
    
    #create list of columns
    nmbrcolumns = [column + '_strn']

    nmbrnormalization_dict = {column + '_strn' : {'overlap_dict' : overlap_dict}}
#                                                   'mean' : mean, \
#                                                   'maximum' : maximum, \
#                                                   'minimum' : minimum }}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []
    
    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'strn', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return df, column_dict_list

  def process_strg_class(self, df, column, category, postprocess_dict, params = {}):
    '''
    #str function
    #accepts input of integer categoric sets, such as from an ordinal transform
    #and converts to strings for purposes of categoric recognition in some downstream libaries
    #(eg some libraries will treat integer label sets as targets for regression instead of classificaiton)
    #does not perform infill, just converts entries to string
    '''
    
    suffixoverlap_results = {}
      
    strg_column = column + '_strg'
    
    df, suffixoverlap_results = \
    self.df_copy_train(df, column, strg_column, suffixoverlap_results)
    
    df[strg_column] = df[strg_column].astype(str)

    column_dict_list = []

    column_dict = {strg_column : {'category' : 'strg', \
                                 'origcategory' : category, \
                                 'normalization_dict' : {strg_column:{}}, \
                                 'origcolumn' : column, \
                                 'inputcolumn' : column, \
                                 'columnslist' : [strg_column], \
                                 'categorylist' : [strg_column], \
                                 'infillmodel' : False, \
                                 'infillcomplete' : False, \
                                 'suffixoverlap_results' : suffixoverlap_results, \
                                 'deletecolumn' : False}}
    
    #now append column_dict onto postprocess_dict
    column_dict_list.append(column_dict.copy())

    return df, column_dict_list

  def process_nmrc_class(self, df, column, category, postprocess_dict, params = {}):
    """
    #process_nmrc_class(df, column, category, postprocess_dict)
    #parses string entries and if any numbers present returns numbers
    #entries without numbers present subject to infill
    #accepts parameters 
    #convention as numbers/commas/spaces
    #suffix for column suffix identifier
    """
    
    suffixoverlap_results = {}
    
    if 'convention' in params:
      #accepts numbers/commas/spaces
      convention = params['convention']
    else:
      convention = 'numbers'
      
    if 'suffix' in params:
      #accepts string for suffix appender
      suffix = params['suffix']
    else:
      suffix = '_nmrc'
      
    nmrc_column = column + suffix
    
    df, suffixoverlap_results = \
    self.df_copy_train(df, column, nmrc_column, suffixoverlap_results)
    
    unique_list = list(df[nmrc_column].unique())

    unique_list = list(map(str, unique_list))
    
    maxlength = max(len(x) for x in unique_list)
    
    overlap_lengths = list(range(maxlength, 0, -1))

    overlap_dict = {}
    
    for overlap_length in overlap_lengths:

      for unique in unique_list:
        
        if unique not in overlap_dict:

          len_unique = len(unique)

          if len_unique >= overlap_length:
            
            if overlap_length > 1:

              nbr_iterations = len_unique - overlap_length

              for i in range(nbr_iterations + 1):
                
                if unique not in overlap_dict:

                  extract = unique[i:(overlap_length+i)]

  #                 extract_already_in_overlap_dict = False
                  
                  if convention == 'numbers':
                  
                    if self.is_number(extract):

                      overlap_dict.update({unique : float(extract)})
              
                  elif convention == 'commas':
                  
                    if self.is_number_comma(extract):

                      overlap_dict.update({unique : float(extract.replace(',',''))})
                      
                  elif convention == 'spaces':
                  
                    if self.is_number_EU(extract):

                      overlap_dict.update({unique : float(extract[0] + extract[1:-1].replace(' ','').replace('.','').replace(',','.') + extract[-1])})
                      
            #else if overlap_length == 1    
            else:
              
              nbr_iterations = len_unique - overlap_length
              
              in_dict = False

              for i in range(nbr_iterations + 1):
                
                if unique not in overlap_dict:

                  extract = unique[i:(overlap_length+i)]

  #                 extract_already_in_overlap_dict = False

                  if self.is_number(extract):

                    in_dict = True

                    overlap_dict.update({unique : float(extract)})

              if in_dict is False:

                overlap_dict.update({unique : np.nan})
    
    df[nmrc_column] = df[nmrc_column].astype(str)
    df[nmrc_column] = df[nmrc_column].replace(overlap_dict)

    df[nmrc_column] = pd.to_numeric(df[nmrc_column], errors='coerce')
    
    #get mean of training data
    mean = df[nmrc_column].mean()
    if mean != mean:
      mean = 0
      
    #replace missing data with training set mean as default infill
    df[nmrc_column] = df[nmrc_column].fillna(mean)
    
    #a few more metrics collected for driftreport
    #get maximum value of training column
    maximum = df[nmrc_column].max()
    #get minimum value of training column
    minimum = df[nmrc_column].min()
    
    #create list of columns
    nmbrcolumns = [nmrc_column]

    #populate data structures
    nmbrnormalization_dict = {nmrc_column : {'overlap_dict' : overlap_dict, \
                                            'mean' : mean, \
                                            'maximum' : maximum, \
                                            'minimum' : minimum, \
                                            'convention' : convention, \
                                            'suffix' : suffix }}
    
    column_dict_list = []
    
    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'nmrc', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
    
    return df, column_dict_list

  def process_nmr4_class(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    """
    #extract numeric partitions from categoric entries, test treated differently than train
    #accepts parameters
    #convention as numbers/commas/spaces
    #suffix for column suffix identifier
    #test_same_as_train as True/False
    #where True copiues overlap_dict from train for test, False parses test entries not found in train
    """
    
    suffixoverlap_results = {}
    
    if 'convention' in params:
      #accepts numbers/commas/spaces
      convention = params['convention']
    else:
      convention = 'numbers'
      
    if 'suffix' in params:
      #accepts string for suffix appender
      suffix = params['suffix']
    else:
      suffix = '_nmr4'
      
    if 'test_same_as_train' in params:
      #accepts boolean
      test_same_as_train = params['test_same_as_train']
    else:
      test_same_as_train = True
      
    nmrc_column = column + suffix
    
    mdf_train, suffixoverlap_results = \
    self.df_copy_train(mdf_train, column, nmrc_column, suffixoverlap_results)
    
    mdf_test[nmrc_column] = mdf_test[column].copy()
    
    #begin parsing train set
    
    unique_list = list(mdf_train[nmrc_column].unique())

    unique_list = list(map(str, unique_list))
    
    maxlength = max(len(x) for x in unique_list)
    
    overlap_lengths = list(range(maxlength, 0, -1))

    overlap_dict = {}
    
    for overlap_length in overlap_lengths:

      for unique in unique_list:
        
        if unique not in overlap_dict:

          len_unique = len(unique)

          if len_unique >= overlap_length:
            
            if overlap_length > 1:

              nbr_iterations = len_unique - overlap_length

              for i in range(nbr_iterations + 1):
                
                if unique not in overlap_dict:

                  extract = unique[i:(overlap_length+i)]

  #                 extract_already_in_overlap_dict = False
                  
                  if convention == 'numbers':
                  
                    if self.is_number(extract):

                      overlap_dict.update({unique : float(extract)})
              
                  elif convention == 'commas':
                  
                    if self.is_number_comma(extract):

                      overlap_dict.update({unique : float(extract.replace(',',''))})
                      
                  elif convention == 'spaces':
                  
                    if self.is_number_EU(extract):

                      overlap_dict.update({unique : float(extract[0] + extract[1:-1].replace(' ','').replace('.','').replace(',','.') + extract[-1])})
                      
            #else if overlap_length == 1    
            else:
              
              nbr_iterations = len_unique - overlap_length
              
              in_dict = False

              for i in range(nbr_iterations + 1):
                
                if unique not in overlap_dict:

                  extract = unique[i:(overlap_length+i)]

  #                 extract_already_in_overlap_dict = False
  
                  if self.is_number(extract):

                    in_dict = True

                    overlap_dict.update({unique : float(extract)})

              if in_dict is False:

                overlap_dict.update({unique : np.nan})
                
    mdf_train[nmrc_column] = mdf_train[nmrc_column].astype(str)
    mdf_train[nmrc_column] = mdf_train[nmrc_column].replace(overlap_dict)
    
    #now test set
    test_unique_list = list(mdf_test[nmrc_column].unique())
    test_unique_list = list(map(str, test_unique_list))
    extra_test_unique = list(set(test_unique_list) - set(unique_list))

    test_overlap_dict = deepcopy(overlap_dict)
    
    if test_same_as_train is True:
      
      for test_unique in extra_test_unique:
        test_overlap_dict.update({str(test_unique) : np.nan})
      
    elif test_same_as_train is False:
      
      testmaxlength = max(len(x) for x in unique_list)

      overlap_lengths = list(range(testmaxlength, 0, -1))

  #     overlap_dict = {}

      for overlap_length in overlap_lengths:

        for unique in extra_test_unique:

          if unique not in test_overlap_dict:

            len_unique = len(unique)

            if len_unique >= overlap_length:

              if overlap_length > 1:

                nbr_iterations = len_unique - overlap_length

                for i in range(nbr_iterations + 1):

                  if unique not in test_overlap_dict:

                    extract = unique[i:(overlap_length+i)]

    #                 extract_already_in_overlap_dict = False
                    
                    if convention == 'numbers':
                    
                      if self.is_number(extract):

                        test_overlap_dict.update({unique : float(extract)})
                  
                    elif convention == 'commas':
                    
                      if self.is_number_comma(extract):

                        test_overlap_dict.update({unique : float(extract.replace(',',''))})
                        
                    elif convention == 'spaces':
                    
                      if self.is_number_EU(extract):

                        test_overlap_dict.update({unique : float(extract[0] + extract[1:-1].replace(' ','').replace('.','').replace(',','.') + extract[-1])})

              #else if overlap_length == 1    
              else:

                nbr_iterations = len_unique - overlap_length

                in_dict = False

                for i in range(nbr_iterations + 1):

                  if unique not in test_overlap_dict:

                    extract = unique[i:(overlap_length+i)]

    #                 extract_already_in_overlap_dict = False
                    
                    if self.is_number(extract):

                      in_dict = True

                      test_overlap_dict.update({unique : float(extract)})
                    
                if in_dict is False:

                  test_overlap_dict.update({unique : np.nan})
    
    #great now that test_overlap_dict is populated
    mdf_test[nmrc_column] = mdf_test[nmrc_column].astype(str)
    mdf_test[nmrc_column] = mdf_test[nmrc_column].replace(test_overlap_dict)

    mdf_train[nmrc_column] = pd.to_numeric(mdf_train[nmrc_column], errors='coerce')
    mdf_test[nmrc_column] = pd.to_numeric(mdf_test[nmrc_column], errors='coerce')

    #get mean of training data
    mean = mdf_train[nmrc_column].mean()
    if mean != mean:
      mean = 0

    #replace missing data with training set mean as default infill
    mdf_train[nmrc_column] = mdf_train[nmrc_column].fillna(mean)
    mdf_test[nmrc_column] = mdf_test[nmrc_column].fillna(mean)
    
    #a few more metrics collected for driftreport
    maximum = mdf_train[nmrc_column].max()
    minimum = mdf_train[nmrc_column].min()
    
    #create list of columns
    nmbrcolumns = [nmrc_column]
    
    #populate data structures
    nmbrnormalization_dict = {nmrc_column : {'overlap_dict' : overlap_dict, \
                                            'mean' : mean, \
                                            'maximum' : maximum, \
                                            'minimum' : minimum, \
                                            'unique_list' : unique_list, \
                                            'maxlength' : maxlength, \
                                            'convention' : convention, \
                                            'suffix' : suffix, \
                                            'test_same_as_train' : test_same_as_train}}
    
    column_dict_list = []
    
    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'nmr4', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
      
    return mdf_train, mdf_test, column_dict_list
  
  def process_ordl_class(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    #process_ordl_class(mdf_train, mdf_test, column, category)
    #preprocess column with categories into ordinal (sequentuial integer) sets
    #corresponding to (sorted) categories
    #adresses infill with new point which we arbitrarily set as 'zzzinfill'
    #intended to show up as last point in set alphabetically
    #for categories presetn in test set not present in train set use this 'zzz' category
    #as implemented this function seperately encodes numbers and string equivalent (eg 2 != '2')
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    #adjinfill accepts True/False to change default infill from mean inputation to adjacent cell
    if 'adjinfill' in params:
      adjinfill = params['adjinfill']
    else:
      adjinfill = False
      
    #ordered_overide is boolean to indicate if order of integer encoding basis will 
    #defer to cases when a column is a pandas categorical ordered set
    if 'ordered_overide' in params:
      ordered_overide = params['ordered_overide']
    else:
      ordered_overide = True
      
    #str_convert provides consistent encodings between numbers and string equivalent, eg 2 == '2'
    if 'str_convert' in params:
      str_convert = params['str_convert']
    else:
      str_convert = False
    
    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self.df_copy_train(mdf_train, column, column + '_ordl', suffixoverlap_results)

      mdf_test[column + '_ordl'] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self.df_check_suffixoverlap(mdf_train, column + '_ordl', suffixoverlap_results)
      
      mdf_train.rename(columns = {column : column + '_ordl'}, inplace = True)
      mdf_test.rename(columns = {column : column + '_ordl'}, inplace = True)
      
    ordered = False
    if ordered_overide:
      if mdf_train[column + '_ordl'].dtype.name == 'category':
        if mdf_train[column + '_ordl'].cat.ordered:
          ordered = True
          labels_train = list(mdf_train[column + '_ordl'].cat.categories)
          if mdf_test[column + '_ordl'].dtype.name == 'category':
            if mdf_test[column + '_ordl'].cat.ordered:
              labels_test = list(mdf_test[column + '_ordl'].cat.categories)
            else:
              ordered = False
          else:
            ordered = False
    
    #convert column to category if it isn't already
    mdf_train[column + '_ordl'] = mdf_train[column + '_ordl'].astype('category')
    mdf_test[column + '_ordl'] = mdf_test[column + '_ordl'].astype('category')

    #if set is categorical we'll need the plug value for missing values included
    if 'zzzinfill' not in mdf_train[column + '_ordl'].cat.categories:
      mdf_train[column + '_ordl'] = mdf_train[column + '_ordl'].cat.add_categories(['zzzinfill'])
    if 'zzzinfill' not in mdf_test[column + '_ordl'].cat.categories:
      mdf_test[column + '_ordl'] = mdf_test[column + '_ordl'].cat.add_categories(['zzzinfill'])
      
    if adjinfill is True:
      mdf_train[column + '_ordl'] = mdf_train[column + '_ordl'].fillna(method='ffill')
      mdf_test[column + '_ordl'] = mdf_test[column + '_ordl'].fillna(method='ffill')
      mdf_train[column + '_ordl'] = mdf_train[column + '_ordl'].fillna(method='bfill')
      mdf_test[column + '_ordl'] = mdf_test[column + '_ordl'].fillna(method='bfill')

    #replace NA with a dummy variable
    mdf_train[column + '_ordl'] = mdf_train[column + '_ordl'].fillna('zzzinfill')
    mdf_test[column + '_ordl'] = mdf_test[column + '_ordl'].fillna('zzzinfill')

    #replace numerical with string equivalent
    if str_convert is True:
      mdf_train[column + '_ordl'] = mdf_train[column + '_ordl'].astype(str)
      mdf_test[column + '_ordl'] = mdf_test[column + '_ordl'].astype(str)
      if ordered is True:
        labels_train = [str(x) for x in labels_train]
        labels_test = [str(x) for x in labels_test]
    else:
      mdf_train[column + '_ordl'] = mdf_train[column + '_ordl'].astype('object')
      mdf_test[column + '_ordl'] = mdf_test[column + '_ordl'].astype('object')
            
    if ordered is False:
      
      #extract categories for column labels
      #note that .unique() extracts the labels as a numpy array
      labels_train = list(mdf_train[column + '_ordl'].unique())
      labels_train = sorted(labels_train, key=str)
      labels_test = list(mdf_test[column + '_ordl'].unique())
      labels_test = sorted(labels_test, key=str)

    #if infill not present in train set, insert
    if 'zzzinfill' not in labels_train:
      labels_train = labels_train + ['zzzinfill']
#       labels_train.sort()
    if 'zzzinfill' not in labels_test:
      labels_test = labels_test + ['zzzinfill']
#       labels_test.sort()
    
    listlength = len(labels_train)
    
    #____
    #quick check if there are any overlaps between binary encodings and prior unique values in the column
    #as would interfere with the replacement operation
    
    overlap_list = []
    overlap_replace = {}
    for value in labels_train:
      if value in range(listlength):
        overlap_list.append(value)
        
        #here's what we'll replace with, the string suffix is arbitrary and intended as not likely to be in set
        overlap_replace.update({value : str(value) + 'encoding_overlap'})
    
    #here we replace the overlaps with version with jibberish suffix
    if len(overlap_list) > 0:
      
      #then we'll redo the encodings
      
      if ordered is True:
        #this replaces entries with overlap while retaining order
        for foundoverlap in overlap_replace:
          labels_train = [overlap_replace[foundoverlap] if x == foundoverlap else x for x in labels_train]
          labels_test = [overlap_replace[foundoverlap] if x == foundoverlap else x for x in labels_test]
          
        #then replace encoding overlap entries in the returned column

        mdf_train[column + '_ordl'] = mdf_train[column + '_ordl'].replace(overlap_replace)
        mdf_test[column + '_ordl'] = mdf_test[column + '_ordl'].replace(overlap_replace)

      if ordered is False:

        mdf_train[column + '_ordl'] = mdf_train[column + '_ordl'].replace(overlap_replace)
        mdf_test[column + '_ordl'] = mdf_test[column + '_ordl'].replace(overlap_replace)

        #extract categories for column labels
        #note that .unique() extracts the labels as a numpy array
        labels_train = list(mdf_train[column + '_ordl'].unique())
        labels_train = sorted(labels_train, key=str)
        labels_test = list(mdf_test[column + '_ordl'].unique())
        labels_test = sorted(labels_test, key=str)

      #if infill not present in train set, insert
      if 'zzzinfill' not in labels_train:
        labels_train = labels_train + ['zzzinfill']
      if 'zzzinfill' not in labels_test:
        labels_test = labels_test + ['zzzinfill']
      
    #clear up memory
    del overlap_list
    
    #____
    
    #get length of the list, then zip a dictionary from list and range(length)
    #the range values will be our ordinal points to replace the categories
    listlength = len(labels_train)
    ordinal_dict = dict(zip(labels_train, range(listlength)))
    
    #dtype operation is to address edge case if object type drifted to numeric which impacts replace
    if mdf_train[column + '_ordl'].dtype.name != 'object':
      mdf_train[column + '_ordl'] = mdf_train[column + '_ordl'].astype('object')
    
    #replace the cateogries in train set via ordinal trasnformation
    mdf_train[column + '_ordl'] = mdf_train[column + '_ordl'].replace(ordinal_dict)
    
    #in test set, we'll need to strike any categories that weren't present in train
    #first let'/s identify what applies
    testspecificcategories = list(set(labels_test)-set(labels_train))
    
    #so we'll just replace those items with our plug value
    testplug_dict = dict(zip(testspecificcategories, ['zzzinfill'] * len(testspecificcategories)))
    if mdf_test[column + '_ordl'].dtype.name != 'object':
      mdf_test[column + '_ordl'] = mdf_test[column + '_ordl'].astype('object')
    mdf_test[column + '_ordl'] = mdf_test[column + '_ordl'].replace(testplug_dict)
    
    #now we'll apply the ordinal transformation to the test set
    if mdf_test[column + '_ordl'].dtype.name != 'object':
      mdf_test[column + '_ordl'] = mdf_test[column + '_ordl'].astype('object')
    mdf_test[column + '_ordl'] = mdf_test[column + '_ordl'].replace(ordinal_dict)
    
    #just want to make sure these arent' being saved as floats for memory considerations
    if len(ordinal_dict) < 254:
      mdf_train[column + '_ordl'] = mdf_train[column + '_ordl'].astype(np.uint8)
      mdf_test[column + '_ordl'] = mdf_test[column + '_ordl'].astype(np.uint8)
    elif len(ordinal_dict) < 65530:
      mdf_train[column + '_ordl'] = mdf_train[column + '_ordl'].astype(np.uint16)
      mdf_test[column + '_ordl'] = mdf_test[column + '_ordl'].astype(np.uint16)
    else:
      mdf_train[column + '_ordl'] = mdf_train[column + '_ordl'].astype(np.uint32)
      mdf_test[column + '_ordl'] = mdf_test[column + '_ordl'].astype(np.uint32)
    
#     #convert column to category
#     mdf_train[column + '_ordl'] = mdf_train[column + '_ordl'].astype('category')
#     mdf_test[column + '_ordl'] = mdf_test[column + '_ordl'].astype('category')

#     #change data type for memory savings
#     mdf_train[column + '_ordl'] = mdf_train[column + '_ordl'].astype(np.int32)
#     mdf_test[column + '_ordl'] = mdf_test[column + '_ordl'].astype(np.int32)

    #new driftreport metric ordl_activations_dict
    ordl_activations_dict = {}
    for key in ordinal_dict:
      sumcalc = (mdf_train[column+'_ordl'] == ordinal_dict[key]).sum() 
      ratio = sumcalc / mdf_train[column+'_ordl'].shape[0]
      ordl_activations_dict.update({key:ratio})

    inverse_ordinal_dict = {value:key for key,value in ordinal_dict.items()}
    activations_list = list(inverse_ordinal_dict)
    
    categorylist = [column + '_ordl']  
        
    column_dict_list = []
    
    for tc in categorylist:
        
      normalization_dict = {tc : {'ordinal_dict' : ordinal_dict, \
                                  'inverse_ordinal_dict' : inverse_ordinal_dict, \
                                  'activations_list' : activations_list, \
                                  'ordinal_overlap_replace' : overlap_replace, \
                                  'ordl_activations_dict' : ordl_activations_dict, \
                                  'adjinfill' : adjinfill, \
                                  'ordered_overide' : ordered_overide, \
                                  'ordered' : ordered, \
                                  'str_convert' : str_convert}}
    
      column_dict = {tc : {'category' : 'ordl', \
                           'origcategory' : category, \
                           'normalization_dict' : normalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : categorylist, \
                           'categorylist' : categorylist, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
    
    return mdf_train, mdf_test, column_dict_list

  def process_ord3_class(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    #process_ord3_class(mdf_train, mdf_test, column, category)
    #preprocess column with categories into ordinal (sequentuial integer) sets
    #corresponding to categories sorted by frequency of occurance
    #adresses infill with new point which we arbitrarily set as 'zzzinfill'
    #intended to show up as last point in set alphabetically
    #for categories presetn in test set not present in train set use this 'zzz' category
    #as implemented this function seperately encodes numbers and string equivalent (eg 2 != '2')
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    #adjinfill accepts True/False to change default infill from mean inputation to adjacent cell
    if 'adjinfill' in params:
      adjinfill = params['adjinfill']
    else:
      adjinfill = False
      
    #ordered_overide is boolean to indicate if order of integer encoding basis will 
    #defer to cases when a column is a pandas categorical ordered set
    if 'ordered_overide' in params:
      ordered_overide = params['ordered_overide']
    else:
      ordered_overide = True
      
    #str_convert provides consistent encodings between numbers and string equivalent, eg 2 == '2'
    if 'str_convert' in params:
      str_convert = params['str_convert']
    else:
      str_convert = False
    
    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self.df_copy_train(mdf_train, column, column + '_ord3', suffixoverlap_results)

      mdf_test[column + '_ord3'] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self.df_check_suffixoverlap(mdf_train, column + '_ord3', suffixoverlap_results)
      
      mdf_train.rename(columns = {column : column + '_ord3'}, inplace = True)
      mdf_test.rename(columns = {column : column + '_ord3'}, inplace = True)
    
    ordered = False
    if ordered_overide:
      if mdf_train[column + '_ord3'].dtype.name == 'category':
        if mdf_train[column + '_ord3'].cat.ordered:
          ordered = True
          labels_train = list(mdf_train[column + '_ord3'].cat.categories)
          if mdf_test[column + '_ord3'].dtype.name == 'category':
            if mdf_test[column + '_ord3'].cat.ordered:
              labels_test = list(mdf_test[column + '_ord3'].cat.categories)
            else:
              ordered = False
          else:
            ordered = False
    
    #convert column to category if it isn't already
    mdf_train[column + '_ord3'] = mdf_train[column + '_ord3'].astype('category')
    mdf_test[column + '_ord3'] = mdf_test[column + '_ord3'].astype('category')

    #if set is categorical we'll need the plug value for missing values included
    if 'zzzinfill' not in mdf_train[column + '_ord3'].cat.categories:
      mdf_train[column + '_ord3'] = mdf_train[column + '_ord3'].cat.add_categories(['zzzinfill'])
    if 'zzzinfill' not in mdf_test[column + '_ord3'].cat.categories:
      mdf_test[column + '_ord3'] = mdf_test[column + '_ord3'].cat.add_categories(['zzzinfill'])
      
    if adjinfill is True:
      mdf_train[column + '_ord3'] = mdf_train[column + '_ord3'].fillna(method='ffill')
      mdf_test[column + '_ord3'] = mdf_test[column + '_ord3'].fillna(method='ffill')
      mdf_train[column + '_ord3'] = mdf_train[column + '_ord3'].fillna(method='bfill')
      mdf_test[column + '_ord3'] = mdf_test[column + '_ord3'].fillna(method='bfill')

    #replace NA with a dummy variable
    mdf_train[column + '_ord3'] = mdf_train[column + '_ord3'].fillna('zzzinfill')
    mdf_test[column + '_ord3'] = mdf_test[column + '_ord3'].fillna('zzzinfill')
    
    if str_convert is True:
      #replace numerical with string equivalent (this operation changes dtype from category to object)
      mdf_train[column + '_ord3'] = mdf_train[column + '_ord3'].astype(str)
      mdf_test[column + '_ord3'] = mdf_test[column + '_ord3'].astype(str)
      if ordered is True:
        labels_train = [str(x) for x in labels_train]
        labels_test = [str(x) for x in labels_test]
    else:
      mdf_train[column + '_ord3'] = mdf_train[column + '_ord3'].astype('object')
      mdf_test[column + '_ord3'] = mdf_test[column + '_ord3'].astype('object')
            
    if ordered is False:
      
      #extract categories for column labels
      #with values sorted by frequency of occurance from most to least
      labels_train = pd.DataFrame(mdf_train[column + '_ord3'].value_counts())
      labels_train = labels_train.rename_axis('zzzinfill').sort_values(by = [column + '_ord3', 'zzzinfill'], ascending = [False, True])
      labels_train = list(labels_train.index)
      
      labels_test = list(mdf_test[column + '_ord3'].unique())

    #if infill not present in train set, insert
    if 'zzzinfill' not in labels_train:
      labels_train = labels_train + ['zzzinfill']
    if 'zzzinfill' not in labels_test:
      labels_test = labels_test + ['zzzinfill']
    
    listlength = len(labels_train)
    
    #____
    #quick check if there are any overlaps between binary encodings and prior unique values in the column
    #as would interfere with the replacement operation
    
    overlap_list = []
    overlap_replace = {}
    for value in labels_train:
      if value in range(listlength):
        overlap_list.append(value)
        
        #here's what we'll replace with, the string suffix is arbitrary and intended as not likely to be in set
        overlap_replace.update({value : str(value) + 'encoding_overlap'})
    
    #here we replace the overlaps with version with jibberish suffix
    if len(overlap_list) > 0:
      
      if ordered is True:
        #this replaces entries with overlap while retaining order
        for foundoverlap in overlap_replace:
          labels_train = [overlap_replace[foundoverlap] if x == foundoverlap else x for x in labels_train]
          labels_test = [overlap_replace[foundoverlap] if x == foundoverlap else x for x in labels_test]
          
        #then replace encoding overlap entries in the returned column
        mdf_train[column + '_ord3'] = mdf_train[column + '_ord3'].replace(overlap_replace)
        mdf_test[column + '_ord3'] = mdf_test[column + '_ord3'].replace(overlap_replace)

      if ordered is False:
            
        mdf_train[column + '_ord3'] = mdf_train[column + '_ord3'].replace(overlap_replace)
        mdf_test[column + '_ord3'] = mdf_test[column + '_ord3'].replace(overlap_replace)

        #then we'll redo the encodings

        #extract categories for column labels
        #note that .unique() extracts the labels as a numpy array
        labels_train = pd.DataFrame(mdf_train[column + '_ord3'].value_counts())
        labels_train = labels_train.rename_axis('zzzinfill').sort_values(by = [column + '_ord3', 'zzzinfill'], ascending = [False, True])
        labels_train = list(labels_train.index)

        labels_test = list(mdf_test[column + '_ord3'].unique())
        
      #if infill not present in train set, insert
      if 'zzzinfill' not in labels_train:
        labels_train = labels_train + ['zzzinfill']
      if 'zzzinfill' not in labels_test:
        labels_test = labels_test + ['zzzinfill']
      
    #clear up memory
    del overlap_list
    
    #____
    
    #get length of the list, then zip a dictionary from list and range(length)
    #the range values will be our ordinal points to replace the categories
    listlength = len(labels_train)
    ordinal_dict = dict(zip(labels_train, range(listlength)))
    
    #there is an edge case for replace operation is dtyp drifted from object such as to numeric
    if mdf_train[column + '_ord3'].dtype.name != 'object':
      mdf_train[column + '_ord3'] = mdf_train[column + '_ord3'].astype('object')
    
    #replace the cateogries in train set via ordinal trasnformation
    mdf_train[column + '_ord3'] = mdf_train[column + '_ord3'].replace(ordinal_dict)
    
    #in test set, we'll need to strike any categories that weren't present in train
    #first let'/s identify what applies
    testspecificcategories = list(set(labels_test)-set(labels_train))
    
    #so we'll just replace those items with our plug value
    testplug_dict = dict(zip(testspecificcategories, ['zzzinfill'] * len(testspecificcategories)))
    if mdf_test[column + '_ord3'].dtype.name != 'object':
      mdf_test[column + '_ord3'] = mdf_test[column + '_ord3'].astype('object')
    mdf_test[column + '_ord3'] = mdf_test[column + '_ord3'].replace(testplug_dict)
    
    #now we'll apply the ordinal transformation to the test set
    if mdf_test[column + '_ord3'].dtype.name != 'object':
      mdf_test[column + '_ord3'] = mdf_test[column + '_ord3'].astype('object')
    mdf_test[column + '_ord3'] = mdf_test[column + '_ord3'].replace(ordinal_dict)
    
    #just want to make sure these arent' being saved as floats for memory considerations
    if len(ordinal_dict) < 254:
      mdf_train[column + '_ord3'] = mdf_train[column + '_ord3'].astype(np.uint8)
      mdf_test[column + '_ord3'] = mdf_test[column + '_ord3'].astype(np.uint8)
    elif len(ordinal_dict) < 65530:
      mdf_train[column + '_ord3'] = mdf_train[column + '_ord3'].astype(np.uint16)
      mdf_test[column + '_ord3'] = mdf_test[column + '_ord3'].astype(np.uint16)
    else:
      mdf_train[column + '_ord3'] = mdf_train[column + '_ord3'].astype(np.uint32)
      mdf_test[column + '_ord3'] = mdf_test[column + '_ord3'].astype(np.uint32)
    
#     #convert column to category
#     mdf_train[column + '_ordl'] = mdf_train[column + '_ordl'].astype('category')
#     mdf_test[column + '_ordl'] = mdf_test[column + '_ordl'].astype('category')

#     #change data type for memory savings
#     mdf_train[column + '_ordl'] = mdf_train[column + '_ordl'].astype(np.int32)
#     mdf_test[column + '_ordl'] = mdf_test[column + '_ordl'].astype(np.int32)

    #new driftreport metric ordl_activations_dict
    ordl_activations_dict = {}
    for key in ordinal_dict:
      sumcalc = (mdf_train[column+'_ord3'] == ordinal_dict[key]).sum() 
      ratio = sumcalc / mdf_train[column+'_ord3'].shape[0]
      ordl_activations_dict.update({key:ratio})

    inverse_ordinal_dict = {value:key for key,value in ordinal_dict.items()}
    activations_list = list(inverse_ordinal_dict)
    
    categorylist = [column + '_ord3']  
        
    column_dict_list = []
    
    for tc in categorylist:
        
      normalization_dict = {tc : {'ordinal_dict' : ordinal_dict, \
                                  'inverse_ordinal_dict' : inverse_ordinal_dict, \
                                  'activations_list' : activations_list, \
                                  'ordinal_overlap_replace' : overlap_replace, \
                                  'ordl_activations_dict' : ordl_activations_dict, \
                                  'adjinfill' : adjinfill, \
                                  'ordered_overide' : ordered_overide, \
                                  'ordered' : ordered, \
                                  'str_convert' : str_convert}}
    
      column_dict = {tc : {'category' : 'ord3', \
                           'origcategory' : category, \
                           'normalization_dict' : normalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : categorylist, \
                           'categorylist' : categorylist, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
    
    return mdf_train, mdf_test, column_dict_list
  
  def process_ucct_class(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    #process_ucct_class(mdf_train, mdf_test, column, category)
    #preprocess column with categories into unique class count sets
    #normalized by total row count
    #e.g. for each class in train set, 
    #counts instances and divides by total train set row count
    #(so values will fall in range 0-1)
    #test sets recive comparable encoding
    '''
    
    suffixoverlap_results = {}
    
    #create new column for trasnformation
    mdf_train, suffixoverlap_results = \
    self.df_copy_train(mdf_train, column, column + '_ucct', suffixoverlap_results)
    
    mdf_test[column + '_ucct'] = mdf_test[column].copy()
    
    #convert column to category
    mdf_train[column + '_ucct'] = mdf_train[column + '_ucct'].astype('category')
    mdf_test[column + '_ucct'] = mdf_test[column + '_ucct'].astype('category')

    #if set is categorical we'll need the plug value for missing values included
    if 'zzzinfill' not in mdf_train[column + '_ucct'].cat.categories:
      mdf_train[column + '_ucct'] = mdf_train[column + '_ucct'].cat.add_categories(['zzzinfill'])
    if 'zzzinfill' not in mdf_test[column + '_ucct'].cat.categories:
      mdf_test[column + '_ucct'] = mdf_test[column + '_ucct'].cat.add_categories(['zzzinfill'])

    #replace NA with a dummy variable
    mdf_train[column + '_ucct'] = mdf_train[column + '_ucct'].fillna('zzzinfill')
    mdf_test[column + '_ucct'] = mdf_test[column + '_ucct'].fillna('zzzinfill')

    #replace numerical with string equivalent
    mdf_train[column + '_ucct'] = mdf_train[column + '_ucct'].astype(str)
    mdf_test[column + '_ucct'] = mdf_test[column + '_ucct'].astype(str)
    
    #extract categories for column labels
    #with values sorted by frequency of occurance from most to least
    labels_train = pd.DataFrame(mdf_train[column + '_ucct'].value_counts())
    labels_train = labels_train.rename_axis('zzzinfill').sort_values(by = [column + '_ucct', 'zzzinfill'], ascending = [False, True])
    labels_train = list(labels_train.index)
    
#     labels_train = list(mdf_train[column + '_ordl'].unique())
#     labels_train.sort()
    labels_test = list(mdf_test[column + '_ucct'].unique())
    labels_test.sort()

    #if infill not present in train set, insert
    if 'zzzinfill' not in labels_train:
      labels_train = labels_train + ['zzzinfill']
#       labels_train.sort()
    if 'zzzinfill' not in labels_test:
      labels_test = labels_test + ['zzzinfill']
      labels_test.sort()
    
    listlength = len(labels_train)
    
    #____
    #quick check if there are any overlaps between binary encodings and prior unique values in the column
    #as would interfere with the replacement operation
    #(I know this is an outlier scenario, just trying to be thorough)
    
    overlap_list = []
    overlap_replace = {}
    for value in labels_train:
      if value in range(listlength):
        overlap_list.append(value)
        
        #here's what we'll replace with, the string suffix is arbitrary and intended as not likely to be in set
        overlap_replace.update({value : value + 'encoding_overlap'})
    
    #here we replace the overlaps with version with jibberish suffix
    if len(overlap_list) > 0:
      mdf_train[column + '_ucct'] = mdf_train[column + '_ucct'].replace(overlap_replace)
      mdf_test[column + '_ucct'] = mdf_test[column + '_ucct'].replace(overlap_replace)
      
      #then we'll redo the encodings
      
      #extract categories for column labels
      #note that .unique() extracts the labels as a numpy array
      labels_train = pd.DataFrame(mdf_train[column + '_ucct'].value_counts())
      labels_train = labels_train.rename_axis('zzzinfill').sort_values(by = [column + '_ucct', 'zzzinfill'], ascending = [False, True])
      labels_train = list(labels_train.index)
      
#       labels_train = list(mdf_train[column + '_ord2'].unique())
#       labels_train.sort()
      labels_test = list(mdf_test[column + '_ucct'].unique())
      labels_test.sort()
      
    #clear up memory
    del overlap_list
    
    #____
    
    #assemble the ordinal_dict
    #with key of class and value of normalized unique class count
    ordinal_dict = {}
    rowcount = mdf_train.shape[0]
    
    for item in labels_train:
      item_count = mdf_train[mdf_train[column + '_ucct'] == item].shape[0]
      ordinal_dict.update({item: item_count / rowcount})
    
    #replace the cateogries in train set via ordinal trasnformation
    mdf_train[column + '_ucct'] = mdf_train[column + '_ucct'].replace(ordinal_dict)
    
    #in test set, we'll need to strike any categories that weren't present in train
    #first let'/s identify what applies
    testspecificcategories = list(set(labels_test)-set(labels_train))
    
    #so we'll just replace those items with our plug value
    testplug_dict = dict(zip(testspecificcategories, ['zzzinfill'] * len(testspecificcategories)))
    mdf_test[column + '_ucct'] = mdf_test[column + '_ucct'].replace(testplug_dict)
    
    #now we'll apply the ordinal transformation to the test set
    mdf_test[column + '_ucct'] = mdf_test[column + '_ucct'].replace(ordinal_dict)

    #new driftreport metric ordl_activations_dict
    ordl_activations_dict = {}
    for key in ordinal_dict:
      sumcalc = (mdf_train[column+'_ucct'] == ordinal_dict[key]).sum() 
      ratio = sumcalc / mdf_train[column+'_ucct'].shape[0]
      ordl_activations_dict.update({key:ratio})
    
    categorylist = [column + '_ucct']  
        
    column_dict_list = []
    
    for tc in categorylist:
        
      normalization_dict = {tc : {'ordinal_dict' : ordinal_dict, \
                                  'ordinal_overlap_replace' : overlap_replace, \
                                  'ordl_activations_dict' : ordl_activations_dict}}
    
      column_dict = {tc : {'category' : 'ucct', \
                           'origcategory' : category, \
                           'normalization_dict' : normalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : categorylist, \
                           'categorylist' : categorylist, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
    
    return mdf_train, mdf_test, column_dict_list
  
  def process_1010_class(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    #process_1010_class(mdf_train, mdf_test, column, category)
    #preprocess column with categories into binary encoded sets
    #corresponding to (sorted) categories of >2 values
    #adresses infill with new point which we arbitrarily set as 'zzzinfill'
    #intended to show up as last point in set alphabetically
    #for categories present in test set not present in train set uses this 'zzzinfill' category
    '''
    
    suffixoverlap_results = {}
    
    #adjinfill accepts True/False to change default infill from mean inputation to adjacent cell
    if 'adjinfill' in params:
      adjinfill = params['adjinfill']
    else:
      adjinfill = False
      
    #str_convert provides consistent encodings between numbers and string equivalent, eg 2 == '2'
    if 'str_convert' in params:
      str_convert = params['str_convert']
    else:
      str_convert = False
    
    #create new column for trasnformation
    mdf_train, suffixoverlap_results = \
    self.df_copy_train(mdf_train, column, column + '_1010', suffixoverlap_results)
    
    mdf_test[column + '_1010'] = mdf_test[column].copy()
    
    #convert column to category
    mdf_train[column + '_1010'] = mdf_train[column + '_1010'].astype('category')
    mdf_test[column + '_1010'] = mdf_test[column + '_1010'].astype('category')

    #if set is categorical we'll need the plug value for missing values included
    if 'zzzinfill' not in mdf_train[column + '_1010'].cat.categories:
      mdf_train[column + '_1010'] = mdf_train[column + '_1010'].cat.add_categories(['zzzinfill'])
    if 'zzzinfill' not in mdf_test[column + '_1010'].cat.categories:
      mdf_test[column + '_1010'] = mdf_test[column + '_1010'].cat.add_categories(['zzzinfill'])
      
    if adjinfill is True:
      mdf_train[column + '_1010'] = mdf_train[column + '_1010'].fillna(method='ffill')
      mdf_test[column + '_1010'] = mdf_test[column + '_1010'].fillna(method='ffill')
      mdf_train[column + '_1010'] = mdf_train[column + '_1010'].fillna(method='bfill')
      mdf_test[column + '_1010'] = mdf_test[column + '_1010'].fillna(method='bfill')

    #replace NA with a dummy variable
    mdf_train[column + '_1010'] = mdf_train[column + '_1010'].fillna('zzzinfill')
    mdf_test[column + '_1010'] = mdf_test[column + '_1010'].fillna('zzzinfill')

    if str_convert is True:
      #replace numerical with string equivalent
      mdf_train[column + '_1010'] = mdf_train[column + '_1010'].astype(str)
      mdf_test[column + '_1010'] = mdf_test[column + '_1010'].astype(str)
    else:
      mdf_train[column + '_1010'] = mdf_train[column + '_1010'].astype('object')
      mdf_test[column + '_1010'] = mdf_test[column + '_1010'].astype('object')
    
    #extract categories for column labels
    #note that .unique() extracts the labels as a numpy array
    labels_train = list(mdf_train[column + '_1010'].unique())
#     labels_train.sort()
    labels_train = sorted(labels_train, key=str)
    labels_test = list(mdf_test[column + '_1010'].unique())
#     labels_test.sort()
    labels_test = sorted(labels_test, key=str)

    #if infill not present in train set, insert
    if 'zzzinfill' not in labels_train:
      labels_train = labels_train + ['zzzinfill']
      labels_train = sorted(labels_train, key=str)
#       labels_train.sort()
    if 'zzzinfill' not in labels_test:
      labels_test = labels_test + ['zzzinfill']
      labels_test = sorted(labels_test, key=str)
#       labels_test.sort()
    
    #get length of the list
    listlength = len(labels_train)
    
    #calculate number of columns we'll need
    #currently using numk;py since already imported, this could also be done with math library
    binary_column_count = int(np.ceil(np.log2(listlength)))
    
    #initialize dictionaryt to store encodings
    binary_encoding_dict = {}
    encoding_list = []
    
    for i in range(listlength):
      
      #this converts the integer i to binary encoding
      #where f is an f string for inserting the column coount into the string to designate length of encoding
      #0 is to pad out the encoding with 0's for the length
      #and b is telling it to convert to binary 
      #note this returns a string
      encoding = format(i, f"0{binary_column_count}b")
      
      if i < len(labels_train):

        #store the encoding in a dictionary
        binary_encoding_dict.update({labels_train[i] : encoding})

        #store the encoding in a list for checking in next step
        encoding_list.append(encoding)

    #____
    #quick check if there are any overlaps between binary encodings and prior unique values in the column
    #as would interfere with the replacement operation
    #(I know this is an outlier scenario, just trying to be thorough)
    
    overlap_list = []
    overlap_replace = {}
    for value in labels_train:
      if value in encoding_list:
        overlap_list.append(value)
        
        #since overlapreplace will add suffix to the category overlapped with a binary encoding
        #let's quickly check if that category plus suffix is already present in the set
        #if so we'll keep adding digits until a unique entry
        encoding_overlap_suffix = 'encoding_overlap'
        for i in range(111):
          j = random.randint(0,9)
          if value + encoding_overlap_suffix in labels_train:
            encoding_overlap_suffix += str(j)
          else:
            break
        
        #here's what we'll replace with, the string suffix is arbitrary and intended as not likely to be in set
        overlap_replace.update({value : value + encoding_overlap_suffix})

    #here we replace the overlaps with version with jibberish suffix
    if len(overlap_list) > 0:
      
      mdf_train[column + '_1010'] = mdf_train[column + '_1010'].replace(overlap_replace)
      mdf_test[column + '_1010'] = mdf_test[column + '_1010'].replace(overlap_replace)
      
      #then we'll redo the encodings
      
      #extract categories for column labels
      #note that .unique() extracts the labels as a numpy array
      labels_train = list(mdf_train[column + '_1010'].unique())
      labels_train = sorted(labels_train, key=str)
#       labels_train.sort()
      
      labels_test = list(mdf_test[column + '_1010'].unique())
      labels_test = sorted(labels_test, key=str)
#       labels_test.sort()

      #if infill not present in train set, insert
      if 'zzzinfill' not in labels_train:
        labels_train = labels_train + ['zzzinfill']
        labels_train = sorted(labels_train, key=str)
  #       labels_train.sort()
      if 'zzzinfill' not in labels_test:
        labels_test = labels_test + ['zzzinfill']
        labels_test = sorted(labels_test, key=str)
  #       labels_test.sort()
      
      #initialize dictionaryt to store encodings
      binary_encoding_dict = {}
      encoding_list = []

      for i in range(listlength):

        #this converts the integer i to binary encoding
        #where f is an f string for inserting the column coount into the string to designate length of encoding
        #0 is to pad out the encoding with 0's for the length
        #and b is telling it to convert to binary 
        #note this returns a string
        encoding = format(i, f"0{binary_column_count}b")
        
        if i < len(labels_train):

          #store the encoding in a dictionary
          binary_encoding_dict.update({labels_train[i] : encoding})

          #store the encoding in a list for checking in next step
          encoding_list.append(encoding)

    #clear up memory
    del encoding_list
    del overlap_list
    
    #new driftreport metric _1010_activations_dict
    _1010_activations_dict = {}
    for key in binary_encoding_dict:
      sumcalc = (mdf_train[column+'_1010'] == key).sum() 
      ratio = sumcalc / mdf_train[column+'_1010'].shape[0]
      _1010_activations_dict.update({key:ratio})
    
    #____
    
    #replace the cateogries in train set via ordinal trasnformation
    
    if mdf_train[column + '_1010'].dtype.name != 'object':
      mdf_train[column + '_1010'] = mdf_train[column + '_1010'].astype('object')
    
    mdf_train[column + '_1010'] = mdf_train[column + '_1010'].replace(binary_encoding_dict)      
    
    #in test set, we'll need to strike any categories that weren't present in train
    #first let'/s identify what applies
    testspecificcategories = list(set(labels_test)-set(labels_train))
    
    #so we'll just replace those items with our plug value
    if mdf_test[column + '_1010'].dtype.name != 'object':
      mdf_test[column + '_1010'] = mdf_test[column + '_1010'].astype('object')
    testplug_dict = dict(zip(testspecificcategories, ['zzzinfill'] * len(testspecificcategories)))
    mdf_test[column + '_1010'] = mdf_test[column + '_1010'].replace(testplug_dict)    
    
    #now we'll apply the 1010 transformation to the test set
    if mdf_test[column + '_1010'].dtype.name != 'object':
      mdf_test[column + '_1010'] = mdf_test[column + '_1010'].astype('object')
    mdf_test[column + '_1010'] = mdf_test[column + '_1010'].replace(binary_encoding_dict)    

    #ok let's create a list of columns to store each entry of the binary encoding
    _1010_columnlist = []
    
    for i in range(binary_column_count):
      
      _1010_columnlist.append(column + '_1010_' + str(i))
      
    suffixoverlap_results = \
    self.df_check_suffixoverlap(mdf_train, _1010_columnlist, suffixoverlap_results)
      
    #now let's store the encoding
    i=0
    for _1010_column in _1010_columnlist:
      
      mdf_train[_1010_column] = mdf_train[column + '_1010'].str.slice(i,i+1).astype(np.int8)
      
      mdf_test[_1010_column] = mdf_test[column + '_1010'].str.slice(i,i+1).astype(np.int8)
      
      i+=1
  
    #now delete the support column
    del mdf_train[column + '_1010']
    del mdf_test[column + '_1010']
    
    #now store the column_dict entries
    
    categorylist = _1010_columnlist
        
    column_dict_list = []
    
    for tc in categorylist:
        
      normalization_dict = {tc : {'_1010_binary_encoding_dict' : binary_encoding_dict, \
                                  '_1010_overlap_replace' : overlap_replace, \
                                  '_1010_binary_column_count' : binary_column_count, \
                                  '_1010_activations_dict' : _1010_activations_dict, \
                                  'adjinfill' : adjinfill, \
                                  'str_convert' : str_convert}}
    
      column_dict = {tc : {'category' : '1010', \
                           'origcategory' : category, \
                           'normalization_dict' : normalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : categorylist, \
                           'categorylist' : categorylist, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
    
    return mdf_train, mdf_test, column_dict_list

  def process_bshr_class(self, df, column, category, postprocess_dict, params = {}):
    '''
    #processing funciton depending on input format of datetime data 
    #that creates a boolean column indicating 1 for rows
    #corresponding to traditional business hours in source column
    #note this is a "singleprocess" function since is applied to single dataframe
    '''
    
    suffixoverlap_results = {}
    
    #initialize parameters
    if 'start' in params:
      start = params['start']
    else:
      start = 9
      
    if 'end' in params:
      end = params['end']
    else:
      end = 17
      
    suffixoverlap_results = \
    self.df_check_suffixoverlap(df, column+'_bshr', suffixoverlap_results)
    
    #convert improperly formatted values to datetime in new column
    df[column+'_bshr'] = pd.to_datetime(df[column], errors = 'coerce')
    
    #This is kind of hack for whole hour increments, if we were needing
    #to evlauate hour ranges between seperate days a different metod
    #would be required
    #For now we'll defer to Dollly Parton
    df[column+'_bshr'] = df[column+'_bshr'].dt.hour
    df[column+'_bshr'] = df[column+'_bshr'].between(start, end)
    
    #reduce memory footprint
    df[column+'_bshr'] = df[column+'_bshr'].astype(np.int8)
    
    #create list of columns
    datecolumns = [column + '_bshr']

    #grab some driftreport metrics
    activationratio = df[column + '_bshr'].sum() / df[column + '_bshr'].shape[0]

    #create normalization dictionary
    normalization_dict = {column + '_bshr' : {'activationratio' : activationratio}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for dc in datecolumns:

      column_dict = { dc : {'category' : 'bshr', \
                           'origcategory' : category, \
                           'normalization_dict' : normalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : datecolumns, \
                           'categorylist' : datecolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())

    return df, column_dict_list

  def process_wkdy_class(self, df, column, category, postprocess_dict, params = {}):
    '''
    #processing funciton depending on input format of datetime data 
    #that creates a boolean column indicating 1 for rows
    #corresponding to weekdays in source column
    #note this is a "singleprocess" function since is applied to single dataframe
    '''
    
    suffixoverlap_results = {}
    
    suffixoverlap_results = \
    self.df_check_suffixoverlap(df, column+'_wkdy', suffixoverlap_results)
    
    #convert improperly formatted values to datetime in new column
    df[column+'_wkdy'] = pd.to_datetime(df[column], errors = 'coerce')
    
    #This is kind of hack for whole hour increments, if we were needing
    #to evlauate hour ranges between seperate days a different metod
    #would be required
    #For now we'll defer to Dollly Parton
    df[column+'_wkdy'] = pd.DatetimeIndex(df[column+'_wkdy']).dayofweek
    
    df[column+'_wkdy'] = df[column+'_wkdy'].between(0,4)
    
    #reduce memory footprint
    df[column+'_wkdy'] = df[column+'_wkdy'].astype(np.int8)
    
    #create list of columns
    datecolumns = [column+'_wkdy']

    #grab some driftreport metrics
    activationratio = df[column + '_wkdy'].sum() / df[column + '_wkdy'].shape[0]

    #create normalization dictionary
    normalization_dict = {column + '_wkdy' : {'activationratio' : activationratio}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for dc in datecolumns:

      column_dict = { dc : {'category' : 'wkdy', \
                           'origcategory' : category, \
                           'normalization_dict' : normalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : datecolumns, \
                           'categorylist' : datecolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'deletecolumn' : False, \
                           'suffixoverlap_results' : suffixoverlap_results}}

      column_dict_list.append(column_dict.copy())

    return df, column_dict_list

  def process_hldy_class(self, df, column, category, postprocess_dict, params = {}):
    '''
    #processing funciton depending on input format of datetime data 
    #that creates a boolean column indicating 1 for rows
    #corresponding to US Federal Holidays in source column
    #note this is a "singleprocess" function since is applied to single dataframe
    '''
    
    suffixoverlap_results = {}
    
    #initialize parameters
    if 'holiday_list' in params:
      holiday_list = params['holiday_list']
    else:
      holiday_list = []
    
    if len(holiday_list) > 0:
    
      #reformat holiday_list
      holiday_list = pd.to_datetime(pd.DataFrame(holiday_list)[0], errors = 'coerce')

      #reform holiday_list again
      timestamp_list = []

      for row in range(holiday_list.shape[0]):
        timestamp = pd.Timestamp(holiday_list[row])
        timestamp_list += [timestamp]
      timestamp_list
      
    else:
      timestamp_list = []
      
    suffixoverlap_results = \
    self.df_check_suffixoverlap(df, column+'_hldy', suffixoverlap_results)
    
    #convert improperly formatted values to datetime in new column
    df[column+'_hldy'] = pd.to_datetime(df[column], errors = 'coerce')
    
    df[column+'_hldy'] = df[column+'_hldy'].dt.date
    
    df[column+'_hldy'] = pd.to_datetime(df[column+'_hldy'], errors = 'coerce')
    
    #grab list of holidays from import
    holidays = USFederalHolidayCalendar().holidays().tolist()

    holidays += timestamp_list
    
    #activate boolean identifier for holidays
    df[column+'_hldy'] = df[column+'_hldy'].isin(holidays)

    #reduce memory footprint
    df[column+'_hldy'] = df[column+'_hldy'].astype(np.int8)
    
    #create list of columns
    datecolumns = [column + '_hldy']

    #grab some driftreport metrics
    activationratio = df[column + '_hldy'].sum() / df[column + '_hldy'].shape[0]

    #create normalization dictionary
    normalization_dict = {column + '_hldy' : {'activationratio' : activationratio}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for dc in datecolumns:

      column_dict = { dc : {'category' : 'hldy', \
                           'origcategory' : category, \
                           'normalization_dict' : normalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : datecolumns, \
                           'categorylist' : datecolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())

    return df, column_dict_list
  
  def process_wkds_class(self, df, column, category, postprocess_dict, params = {}):
    '''
    #processing funciton depending on input format of datetime data 
    #that creates a categorical column 
    #corresponding to weekdays in source column
    #note this is a "singleprocess" function since is applied to single dataframe
    #defdault infill is eight days a week
    '''
    
    suffixoverlap_results = {}
    
    suffixoverlap_results = \
    self.df_check_suffixoverlap(df, column+'_wkds', suffixoverlap_results)
    
    #convert improperly formatted values to datetime in new column
    df[column+'_wkds'] = pd.to_datetime(df[column], errors = 'coerce')
    
    #This is kind of hack for whole hour increments, if we were needing
    #to evlauate hour ranges between seperate days a different metod
    #would be required
    #For now we'll defer to Dollly Parton
    df[column+'_wkds'] = pd.DatetimeIndex(df[column+'_wkds']).dayofweek
    
#     df[column+'_wkdy'] = df[column+'_wkdy'].between(0,4)

    #we'll use convention for default infill of eight days a week
    df[column + '_wkds'] = df[column + '_wkds'].fillna(7)
    
    #reduce memory footprint
    df[column+'_wkds'] = df[column+'_wkds'].astype(int)
    
    #create list of columns
    datecolumns = [column+'_wkds']

    #grab some driftreport metrics
    numberofrows = df[column + '_wkds'].shape[0]
    mon_ratio = df[df[column + '_wkds'] == 0].shape[0] / numberofrows
    tue_ratio = df[df[column + '_wkds'] == 1].shape[0] / numberofrows
    wed_ratio = df[df[column + '_wkds'] == 2].shape[0] / numberofrows
    thr_ratio = df[df[column + '_wkds'] == 3].shape[0] / numberofrows
    fri_ratio = df[df[column + '_wkds'] == 4].shape[0] / numberofrows
    sat_ratio = df[df[column + '_wkds'] == 5].shape[0] / numberofrows
    sun_ratio = df[df[column + '_wkds'] == 6].shape[0] / numberofrows
    infill_ratio = df[df[column + '_wkds'] == 7].shape[0] / numberofrows
  
  
    #create normalization dictionary
    normalization_dict = {column+'_wkds' : {'mon_ratio' : mon_ratio, \
                                            'tue_ratio' : tue_ratio, \
                                            'wed_ratio' : wed_ratio, \
                                            'thr_ratio' : thr_ratio, \
                                            'fri_ratio' : fri_ratio, \
                                            'sat_ratio' : sat_ratio, \
                                            'sun_ratio' : sun_ratio, \
                                            'infill_ratio' : infill_ratio}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for dc in datecolumns:

      column_dict = { dc : {'category' : 'wkds', \
                           'origcategory' : category, \
                           'normalization_dict' : normalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : datecolumns, \
                           'categorylist' : datecolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())

    return df, column_dict_list
  
  def process_mnts_class(self, df, column, category, postprocess_dict, params = {}):
    '''
    #processing funciton depending on input format of datetime data 
    #that creates a categorical column 
    #corresponding to months in source column
    #note this is a "singleprocess" function since is applied to single dataframe
    #default infill is 0
    '''
    
    suffixoverlap_results = {}
    
    suffixoverlap_results = \
    self.df_check_suffixoverlap(df, column+'_mnts', suffixoverlap_results)
    
    #convert improperly formatted values to datetime in new column
    df[column+'_mnts'] = pd.to_datetime(df[column], errors = 'coerce')
    
    #This is kind of hack for whole hour increments, if we were needing
    #to evlauate hour ranges between seperate days a different metod
    #would be required
    #For now we'll defer to Dollly Parton
    df[column+'_mnts'] = pd.DatetimeIndex(df[column+'_mnts']).month
    
#     df[column+'_wkdy'] = df[column+'_wkdy'].between(0,4)

    #we'll use convention for default infill of eight days a week
    #jan-dec is 1-12, 0 is default infill
    df[column + '_mnts'] = df[column + '_mnts'].fillna(0)
    
    #reduce memory footprint
    df[column+'_mnts'] = df[column+'_mnts'].astype(np.int8)
    
    #create list of columns
    datecolumns = [column+'_mnts']

    #grab some driftreport metrics
    numberofrows = df[column + '_mnts'].shape[0]
    infill_ratio = df[df[column + '_mnts'] == 0].shape[0] / numberofrows
    jan_ratio = df[df[column + '_mnts'] == 1].shape[0] / numberofrows
    feb_ratio = df[df[column + '_mnts'] == 2].shape[0] / numberofrows
    mar_ratio = df[df[column + '_mnts'] == 3].shape[0] / numberofrows
    apr_ratio = df[df[column + '_mnts'] == 4].shape[0] / numberofrows
    may_ratio = df[df[column + '_mnts'] == 5].shape[0] / numberofrows
    jun_ratio = df[df[column + '_mnts'] == 6].shape[0] / numberofrows
    jul_ratio = df[df[column + '_mnts'] == 7].shape[0] / numberofrows
    aug_ratio = df[df[column + '_mnts'] == 8].shape[0] / numberofrows
    sep_ratio = df[df[column + '_mnts'] == 9].shape[0] / numberofrows
    oct_ratio = df[df[column + '_mnts'] == 10].shape[0] / numberofrows
    nov_ratio = df[df[column + '_mnts'] == 11].shape[0] / numberofrows
    dec_ratio = df[df[column + '_mnts'] == 12].shape[0] / numberofrows
  
    #create normalization dictionary
    normalization_dict = {column+'_mnts' : {'infill_ratio' : infill_ratio, \
                                            'jan_ratio' : jan_ratio, \
                                            'feb_ratio' : feb_ratio, \
                                            'mar_ratio' : mar_ratio, \
                                            'apr_ratio' : apr_ratio, \
                                            'may_ratio' : may_ratio, \
                                            'jun_ratio' : jun_ratio, \
                                            'jul_ratio' : jul_ratio, \
                                            'aug_ratio' : aug_ratio, \
                                            'sep_ratio' : sep_ratio, \
                                            'oct_ratio' : oct_ratio, \
                                            'nov_ratio' : nov_ratio, \
                                            'dec_ratio' : dec_ratio}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for dc in datecolumns:

      column_dict = { dc : {'category' : 'mnts', \
                           'origcategory' : category, \
                           'normalization_dict' : normalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : datecolumns, \
                           'categorylist' : datecolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())

    return df, column_dict_list

  def process_tmsc_class(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    """
    #time data segregated by time scale
    #with sin or cos applied to address periodicity
    #such as may be useful to return both by seperate transformation categories
    #accepts parameter 'scale' to distinguish between year/month/day/hour/minute/second
    #note that some scales can be returned combined by passing 
    #monthday/dayhourminute/hourminutesecond/minutesecond
    #accepts parameter 'suffix' for returned column header suffix
    #accets parameter 'function' to distinguish between sin/cos
    """
    
    suffixoverlap_results = {}
    
    if 'scale' in params:
      #accepts year/month/day/hour/minute/second
      scale = params['scale']
    else:
      scale = 'monthday'
      
    if 'suffix' in params:
      #accepts column header suffix appender
      suffix = params['suffix']
    else:
      suffix = '_mdsn'
      
    if 'function' in params:
      #accepts sin/cos
      function = params['function']
    else:
      function = 'sin'
    
    time_column = column + suffix
    
    mdf_train, suffixoverlap_results = \
    self.df_copy_train(mdf_train, column, time_column, suffixoverlap_results)
    
    mdf_test[time_column] = mdf_test[column].copy()
    
    #apply pd.to_datetime to column, note that the errors = 'coerce' needed for messy data
    mdf_train[time_column] = pd.to_datetime(mdf_train[time_column], errors = 'coerce')
    mdf_test[time_column] = pd.to_datetime(mdf_test[time_column], errors = 'coerce')
    
    #access time scale from one of year/month/day/hour/minute/second
    #monthday/dayhourminute/hourminutesecond/minutesecond
    if scale == 'year':
      mdf_train[time_column] = mdf_train[time_column].dt.year
      mdf_test[time_column] = mdf_test[time_column].dt.year
      
      #we'll scale periodicity by decade
      mdf_train[time_column] = (mdf_train[time_column]) * 2 * np.pi / 10
      mdf_test[time_column] = (mdf_test[time_column]) * 2 * np.pi / 10
      
    elif scale == 'month':
      mdf_train[time_column] = mdf_train[time_column].dt.month
      mdf_test[time_column] = mdf_test[time_column].dt.month
      
      #we'll scale periodicity by year
      mdf_train[time_column] = (mdf_train[time_column]) * 2 * np.pi / 12
      mdf_test[time_column] = (mdf_test[time_column]) * 2 * np.pi / 12
      
    elif scale == 'day':
      mdf_train[time_column] = mdf_train[time_column].dt.day
      mdf_test[time_column] = mdf_test[time_column].dt.day
      
      #we'll scale periodicity by week
      mdf_train[time_column] = (mdf_train[time_column]) * 2 * np.pi / 7
      mdf_test[time_column] = (mdf_test[time_column]) * 2 * np.pi / 7
      
    elif scale == 'hour':
      mdf_train[time_column] = mdf_train[time_column].dt.hour
      mdf_test[time_column] = mdf_test[time_column].dt.hour
      
      #we'll scale periodicity by day
      mdf_train[time_column] = (mdf_train[time_column]) * 2 * np.pi / 24
      mdf_test[time_column] = (mdf_test[time_column]) * 2 * np.pi / 24
      
    elif scale == 'minute':
      mdf_train[time_column] = mdf_train[time_column].dt.minute
      mdf_test[time_column] = mdf_test[time_column].dt.minute
      
      #we'll scale periodicity by hour
      mdf_train[time_column] = (mdf_train[time_column]) * 2 * np.pi / 60
      mdf_test[time_column] = (mdf_test[time_column]) * 2 * np.pi / 60
      
    elif scale == 'second':
      mdf_train[time_column] = mdf_train[time_column].dt.second
      mdf_test[time_column] = mdf_test[time_column].dt.second
      
      #we'll scale periodicity by minute
      mdf_train[time_column] = (mdf_train[time_column]) * 2 * np.pi / 60
      mdf_test[time_column] = (mdf_test[time_column]) * 2 * np.pi / 60
    
    elif scale == 'monthday':
      tempcolumn1 = time_column + '_tmp1'
      tempcolumn2 = time_column + '_tmp2'
      
      suffixoverlap_results = \
      self.df_check_suffixoverlap(mdf_train, [tempcolumn1, tempcolumn2], suffixoverlap_results)
      
      #temp1 is for number of days in month, temp2 is to handle leap year support
      mdf_train[tempcolumn1] = mdf_train[time_column].copy()
      mdf_train[tempcolumn2] = mdf_train[time_column].copy()
      
      mdf_train[tempcolumn1] = mdf_train[tempcolumn1].dt.month
      mdf_train[tempcolumn2] = mdf_train[tempcolumn2].dt.is_leap_year
      
      mdf_train[tempcolumn2] = \
      np.where(mdf_train[tempcolumn2], 29, 28)
      
      mdf_train[tempcolumn1] = \
      np.where(mdf_train[tempcolumn1].isin([1,3,5,7,8,10,12]), 31, mdf_train[tempcolumn1].values)
      
      mdf_train[tempcolumn1] = \
      np.where(mdf_train[tempcolumn1].isin([4,6,9,11]), 30, mdf_train[tempcolumn1].values)
      
      mdf_train[tempcolumn1] = \
      np.where(mdf_train[tempcolumn1].isin([2]), mdf_train[tempcolumn2], \
      mdf_train[tempcolumn1].values)
      
      #do same for test set
      mdf_test[tempcolumn1] = mdf_test[time_column].copy()
      mdf_test[tempcolumn2] = mdf_test[time_column].copy()
      
      mdf_test[tempcolumn1] = mdf_test[tempcolumn1].dt.month
      mdf_test[tempcolumn2] = mdf_test[tempcolumn2].dt.is_leap_year
      
      mdf_test[tempcolumn2] = \
      np.where(mdf_test[tempcolumn2], 29, 28)
      
      mdf_test[tempcolumn1] = \
      np.where(mdf_test[tempcolumn1].isin([1,3,5,7,8,10,12]), 31, mdf_test[tempcolumn1].values)
      
      mdf_test[tempcolumn1] = \
      np.where(mdf_test[tempcolumn1].isin([4,6,9,11]), 30, mdf_test[tempcolumn1].values)
      
      mdf_test[tempcolumn1] = \
      np.where(mdf_test[tempcolumn1].isin([2]), mdf_test[tempcolumn2], \
      mdf_test[tempcolumn1].values)
      
      #combine month and day, scale for trigonomic transform, periodicity by year
      mdf_train[time_column] = (mdf_train[time_column].dt.month + mdf_train[time_column].dt.day / \
      mdf_train[tempcolumn1]) * 2 * np.pi / 12
      
      mdf_test[time_column] = (mdf_test[time_column].dt.month + mdf_test[time_column].dt.day / \
      mdf_test[tempcolumn1]) * 2 * np.pi / 12
      
      #delete the support columns 
      del mdf_train[tempcolumn1]
      del mdf_test[tempcolumn1]

      del mdf_train[tempcolumn2]
      del mdf_test[tempcolumn2]
      
    elif scale == 'dayhourminute':
      #we'll scale periodicity by week
      mdf_train[time_column] = (mdf_train[time_column].dt.day + mdf_train[time_column].dt.hour / 24 + mdf_train[time_column].dt.minute / 24 / 60) * 2 * np.pi / 7
      mdf_test[time_column] = (mdf_test[time_column].dt.day + mdf_test[time_column].dt.hour / 24 + mdf_test[time_column].dt.minute / 24 / 60) * 2 * np.pi / 7

    elif scale == 'hourminutesecond':
      #we'll scale periodicity by day
      mdf_train[time_column] = (mdf_train[time_column].dt.hour + mdf_train[time_column].dt.minute / 60 + mdf_train[time_column].dt.second / 60 / 60) * 2 * np.pi / 24
      mdf_test[time_column] = (mdf_test[time_column].dt.hour + mdf_test[time_column].dt.minute / 60 + mdf_test[time_column].dt.second / 60 / 60) * 2 * np.pi / 24

    elif scale == 'minutesecond':
      #we'll scale periodicity by hour
      mdf_train[time_column] = (mdf_train[time_column].dt.minute + mdf_train[time_column].dt.second / 60) * 2 * np.pi / 60
      mdf_test[time_column] = (mdf_test[time_column].dt.minute + mdf_test[time_column].dt.second / 60) * 2 * np.pi / 60
      
    #grab a few drift metrics, we'll evaluate prior to trigometric transform
    timemean = mdf_train[time_column].mean()
    timemax = mdf_train[time_column].max()
    timemin = mdf_train[time_column].min()
    timestd = mdf_train[time_column].std()
    
    #default infill is adjacent cell
    mdf_train[time_column] = mdf_train[time_column].fillna(method='ffill')
    mdf_train[time_column] = mdf_train[time_column].fillna(method='bfill')
    
    mdf_test[time_column] = mdf_test[time_column].fillna(method='ffill')
    mdf_test[time_column] = mdf_test[time_column].fillna(method='bfill')
    
    #backup default infill for cases without valid entries
    mdf_train[time_column] = mdf_train[time_column].fillna(0)
    mdf_test[time_column] = mdf_test[time_column].fillna(0)
    
    #apply trigometric transform
    
    if function == 'sin':
      
      mdf_train[time_column] = np.sin(mdf_train[time_column])
      mdf_test[time_column] = np.sin(mdf_test[time_column])
      
    if function == 'cos':

      mdf_train[time_column] = np.cos(mdf_train[time_column])
      mdf_test[time_column] = np.cos(mdf_test[time_column])
      
    #populate data structures
    column_dict_list = []
    categorylist = [time_column]
    
    for tc in categorylist:
      norm_dict = {tc : {'scale'         : scale, \
                         'suffix'        : suffix, \
                         'function'      : function, \
                         'timemean'      : timemean, \
                         'timemax'       : timemax, \
                         'timemin'       : timemin, \
                         'timestd'       : timestd}}
      
      column_dict = {tc : {'category' : 'tmsc', \
                           'origcategory' : category, \
                           'normalization_dict' : norm_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : categorylist, \
                           'categorylist' : categorylist, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
    
    return mdf_train, mdf_test, column_dict_list

  def process_time_class(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    """
    #z-score normalized time data segregated by a particular time scale
    #accepts parameter 'scale' to distinguish between year/month/day/hour/minute/second
    #accepts parameter 'suffix' for returned column header suffix
    #accepts parameter 'normalization' to distinguish between zscore/minmax/unscaled
    """
    
    suffixoverlap_results = {}
    
    if 'scale' in params:
      #accepts year/month/day/hour/minute/second
      scale = params['scale']
    else:
      scale = 'year'
      
    if 'suffix' in params:
      #accepts column header suffix appender
      suffix = params['suffix']
    else:
      suffix = '_year'
      
    if 'normalization' in params:
      #accepts zscore/minmax/unscaled
      normalization = params['normalization']
    else:
      normalization = 'zscore'
      
    time_column = column + suffix
    
    mdf_train, suffixoverlap_results = \
    self.df_copy_train(mdf_train, column, time_column, suffixoverlap_results)
    
    mdf_test[time_column] = mdf_test[column].copy()
    
    #apply pd.to_datetime to column, note that the errors = 'coerce' needed for messy data
    mdf_train[time_column] = pd.to_datetime(mdf_train[time_column], errors = 'coerce')
    mdf_test[time_column] = pd.to_datetime(mdf_test[time_column], errors = 'coerce')
    
    #access time scale from one of year/month/day/hour/minute/second
    if scale == 'year':
      mdf_train[time_column] = mdf_train[time_column].dt.year
      mdf_test[time_column] = mdf_test[time_column].dt.year
    elif scale == 'month':
      mdf_train[time_column] = mdf_train[time_column].dt.month
      mdf_test[time_column] = mdf_test[time_column].dt.month
    elif scale == 'day':
      mdf_train[time_column] = mdf_train[time_column].dt.day
      mdf_test[time_column] = mdf_test[time_column].dt.day
    elif scale == 'hour':
      mdf_train[time_column] = mdf_train[time_column].dt.hour
      mdf_test[time_column] = mdf_test[time_column].dt.hour
    elif scale == 'minute':
      mdf_train[time_column] = mdf_train[time_column].dt.minute
      mdf_test[time_column] = mdf_test[time_column].dt.minute
    elif scale == 'second':
      mdf_train[time_column] = mdf_train[time_column].dt.second
      mdf_test[time_column] = mdf_test[time_column].dt.second
      
    #default infill is adjacent cell
    mdf_train[time_column] = mdf_train[time_column].fillna(method='ffill')
    mdf_train[time_column] = mdf_train[time_column].fillna(method='bfill')
    
    mdf_test[time_column] = mdf_test[time_column].fillna(method='ffill')
    mdf_test[time_column] = mdf_test[time_column].fillna(method='bfill')
      
    #grab a few drift metrics
    timemean = mdf_train[time_column].mean()
    timemax = mdf_train[time_column].max()
    timemin = mdf_train[time_column].min()
    timestd = mdf_train[time_column].std()

    maxminusmin = timemax - timemin
    
    #backup default infill for cases without valid entries
    mdf_train[time_column] = mdf_train[time_column].fillna(0)
    mdf_test[time_column] = mdf_test[time_column].fillna(0)
      
    #formula for scaling is (x - scaler) / divisor
    #normalizaiton is either zscore/minmax/unscaled
    if normalization == 'zscore':
      scaler = timemean
      divisor = timestd
    if normalization == 'minmax':
      scaler = timemin
      divisor = maxminusmin
    if normalization == 'unscaled':
      scaler = 0
      divisor = 1
    
    if divisor == 0 or divisor != divisor:
      divisor = 1
      
    if scaler != scaler:
      scaler = 0
      
    #apply normalization
    if normalization != 'unscaled':
      mdf_train[time_column] = (mdf_train[time_column] - scaler) / divisor
      mdf_test[time_column] = (mdf_test[time_column] - scaler) / divisor
      
    #populate data structures
    column_dict_list = []
    categorylist = [time_column]
    
    for tc in categorylist:
      norm_dict = {tc : {'scale'         : scale, \
                         'suffix'        : suffix, \
                         'normalization' : normalization, \
                         'scaler'        : scaler, \
                         'divisor'       : divisor, \
                         'timemean'      : timemean, \
                         'timemax'       : timemax, \
                         'timemin'       : timemin, \
                         'timestd'       : timestd, \
                         'maxminusmin'   : maxminusmin}}
      
      column_dict = {tc : {'category' : 'time', \
                           'origcategory' : category, \
                           'normalization_dict' : norm_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : categorylist, \
                           'categorylist' : categorylist, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
    
    return mdf_train, mdf_test, column_dict_list

  def process_bxcx_class(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    Applies Box-Cox transform to an all-positive numerical set.
    '''
    
    suffixoverlap_results = {}
    
    #df_train, nmbrcolumns, nmbrnormalization_dict, categorylist = \
    mdf_train, column_dict_list = \
    self.process_bxcx_support(mdf_train, column, category, 1, bxcx_lmbda = None, \
                              trnsfrm_mean = None)

    #grab the normalization_dict associated with the bxcx category
    columnkeybxcx = column + '_bxcx'
    for column_dict in column_dict_list:
      if columnkeybxcx in column_dict:
        bxcxnormalization_dict = column_dict[columnkeybxcx]['normalization_dict'][columnkeybxcx]

    #df_test, nmbrcolumns, _1, _2 = \
    mdf_test, _1 = \
    self.process_bxcx_support(mdf_test, column, category, 1, bxcx_lmbda = \
                             bxcxnormalization_dict['bxcx_lmbda'], \
                             trnsfrm_mean = bxcxnormalization_dict['trnsfrm_mean'])

    return mdf_train, mdf_test, column_dict_list

  def process_bxcx_support(self, df, column, category, bxcxerrorcorrect, bxcx_lmbda = None, trnsfrm_mean = None):
    '''                      
    #process_bxcx_class(df, column, bxcx_lmbda = None, trnsfrm_mean = None, trnsfrm_std = None)
    #function that takes as input a dataframe with numnerical column for purposes
    #of applying a box-cox transformation. If lmbda = None it will infer a suitable
    #lambda value by minimizing log likelihood using SciPy's stats boxcox call. If
    #we pass a mean or std value it will apply the mean for the initial infill and 
    #use the values to apply postprocess_numerical_class function. 
    #Returns transformed dataframe, a list nmbrcolumns of the associated columns,
    #and a normalization dictionary nmbrnormalization_dict which we'll use for our
    #postprocess_dict, and the parameter lmbda that was used
    #expect this approach works better than our prior numerical address when the 
    #distribution is less thin tailed
    '''
    
    suffixoverlap_results = {}
    
    bxcxcolumn = column + '_bxcx'

    df, suffixoverlap_results = \
    self.df_copy_train(df, column, bxcxcolumn, suffixoverlap_results)

    #convert all values to either numeric or NaN
    df[bxcxcolumn] = pd.to_numeric(df[bxcxcolumn], errors='coerce')
    #convert non-positive values to nan
    df.loc[df[bxcxcolumn] <= 0, (bxcxcolumn)] = np.nan

    #get the mean value to apply to infill
    if trnsfrm_mean == None:
      #get mean of training data
      mean = df[bxcxcolumn].mean()  

    else:
      mean = trnsfrm_mean

    #edge case
    if mean != mean or mean <= 0:
      mean = 0
      bxcx_lmbda = False

    #replace missing data with training set mean
    df[bxcxcolumn] = df[bxcxcolumn].fillna(mean)
    
    #edge case to avoid stats.boxcox error
    if df[bxcxcolumn].nunique() == 1:
      df[bxcxcolumn] = 0
      
      #we'll use convention that if training data is set to 0 then so will all subsequent data
      bxcx_lmbda = False
      
    else:

      #apply box-cox transformation to generate a new column
      #note the returns are different based on whether we passed a lmbda value

      if bxcx_lmbda == None:

        df[bxcxcolumn], bxcx_lmbda = stats.boxcox(df[bxcxcolumn])
        df[bxcxcolumn] *= bxcxerrorcorrect
        
      elif bxcx_lmbda is False:
        
        df[bxcxcolumn] = 0

      else:

        df[bxcxcolumn] = stats.boxcox(df[bxcxcolumn], lmbda = bxcx_lmbda)
        df[bxcxcolumn] *= bxcxerrorcorrect

    #this is to address an error when bxcx transofrm produces overflow
    #I'm not sure of cause, showed up in the housing set)
    bxcxerrorcorrect = 1
    if max(df[bxcxcolumn]) > (2 ** 31 - 1):
      bxcxerrorcorrect = 0
      df[bxcxcolumn] = 0
      bxcxcolumn = bxcxcolumn
      print("overflow condition found in boxcox transofrm, column set to 0: ", bxcxcolumn)

#     #replace original column
#     del df[column]

#     df[column] = df[column + '_temp'].copy()

#     del df[column + '_temp']

#     #change data type for memory savings
#     df[column + '_bxcx'] = df[column + '_bxcx'].astype(np.float32)

    #output of a list of the created column names
    #nmbrcolumns = [column + '_nmbr', column + '_bxcx', column + '_NArw']
    nmbrcolumns = [bxcxcolumn]

    #create list of columns associated with categorical transform (blank for now)
    categorylist = []

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      #save a dictionary of the associated column mean and std

      normalization_dict = {nc : {'trnsfrm_mean' : mean, \
                                  'bxcx_lmbda' : bxcx_lmbda, \
                                  'bxcxerrorcorrect' : bxcxerrorcorrect, \
                                  'mean' : mean}}

      column_dict = { nc : {'category' : 'bxcx', \
                           'origcategory' : category, \
                           'normalization_dict' : normalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())

    #return df, nmbrcolumns, nmbrnormalization_dict, categorylist
    return df, column_dict_list

  def process_log0_class(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    #process_log0_class(mdf_train, mdf_test, column, category)
    #function to apply logatrithmic transform
    #takes as arguement pandas dataframe of training and test data (mdf_train), (mdf_test)\
    #and the name of the column string ('column') and parent category (category)
    #applies a logarithmic transform (base 10)
    #replaces zeros, negative, and missing or improperly formatted data with post-log mean as default infill
    #returns same dataframes with new column of name column + '_log0'
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self.df_copy_train(mdf_train, column, column + '_log0', suffixoverlap_results)

      mdf_test[column + '_log0'] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self.df_check_suffixoverlap(mdf_train, column + '_log0', suffixoverlap_results)
      
      mdf_train.rename(columns = {column : column + '_log0'}, inplace = True)
      mdf_test.rename(columns = {column : column + '_log0'}, inplace = True)

    #convert all values to either numeric or NaN
    mdf_train[column + '_log0'] = pd.to_numeric(mdf_train[column + '_log0'], errors='coerce')
    mdf_test[column + '_log0'] = pd.to_numeric(mdf_test[column + '_log0'], errors='coerce')
    
#     #replace all zeros with nan for the log operation
#     zeroreplace = {0 : np.nan}
#     mdf_train[column + '_log0'] = mdf_train[column + '_log0'].replace(zeroreplace)
#     mdf_test[column + '_log0'] = mdf_test[column + '_log0'].replace(zeroreplace)
    
    #replace all non-positive with nan for the log operation
    mdf_train.loc[mdf_train[column + '_log0'] <= 0, (column + '_log0')] = np.nan
    mdf_test.loc[mdf_test[column + '_log0'] <= 0, (column + '_log0')] = np.nan
    
    #log transform column
    #note that this replaces negative values with nan which we will infill with mean
    mdf_train[column + '_log0'] = np.log10(mdf_train[column + '_log0'])
    mdf_test[column + '_log0'] = np.log10(mdf_test[column + '_log0'])
    
    #get mean of train set
    meanlog = mdf_train[column + '_log0'].mean()
    
    if meanlog != meanlog:
      meanlog = 0

    #replace missing data with training set mean
    mdf_train[column + '_log0'] = mdf_train[column + '_log0'].fillna(meanlog)
    mdf_test[column + '_log0'] = mdf_test[column + '_log0'].fillna(meanlog)

#     #replace missing data with 0
#     mdf_train[column + '_log0'] = mdf_train[column + '_log0'].fillna(0)
#     mdf_test[column + '_log0'] = mdf_test[column + '_log0'].fillna(0)

#     #change data type for memory savings
#     mdf_train[column + '_log0'] = mdf_train[column + '_log0'].astype(np.float32)
#     mdf_test[column + '_log0'] = mdf_test[column + '_log0'].astype(np.float32)

    #create list of columns
    nmbrcolumns = [column + '_log0']

    nmbrnormalization_dict = {column + '_log0' : {'meanlog' : meanlog}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'log0', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return mdf_train, mdf_test, column_dict_list
  
  def process_logn_class(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    #process_logn_class(mdf_train, mdf_test, column, category)
    #function to apply natural logatrithmic transform
    #takes as arguement pandas dataframe of training and test data (mdf_train), (mdf_test)\
    #and the name of the column string ('column') and parent category (category)
    #applies a logarithmic transform (base e)
    #replaces zeros, negative, and missing or improperly formatted data with post-log mean as default infill
    #returns same dataframes with new column of name column + '_logn'
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self.df_copy_train(mdf_train, column, column + '_logn', suffixoverlap_results)

      mdf_test[column + '_logn'] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self.df_check_suffixoverlap(mdf_train, column + '_logn', suffixoverlap_results)
      
      mdf_train.rename(columns = {column : column + '_logn'}, inplace = True)
      mdf_test.rename(columns = {column : column + '_logn'}, inplace = True)

    #convert all values to either numeric or NaN
    mdf_train[column + '_logn'] = pd.to_numeric(mdf_train[column + '_logn'], errors='coerce')
    mdf_test[column + '_logn'] = pd.to_numeric(mdf_test[column + '_logn'], errors='coerce')
    
#     #replace all zeros with nan for the log operation
#     zeroreplace = {0 : np.nan}
#     mdf_train[column + '_log0'] = mdf_train[column + '_log0'].replace(zeroreplace)
#     mdf_test[column + '_log0'] = mdf_test[column + '_log0'].replace(zeroreplace)
    
    #replace all non-positive with nan for the log operation
    mdf_train.loc[mdf_train[column + '_logn'] <= 0, (column + '_logn')] = np.nan
    mdf_test.loc[mdf_test[column + '_logn'] <= 0, (column + '_logn')] = np.nan
    
    #log transform column
    #note that this replaces negative values with nan which we will infill with mean
    mdf_train[column + '_logn'] = np.log(mdf_train[column + '_logn'])
    mdf_test[column + '_logn'] = np.log(mdf_test[column + '_logn'])
    
    #get mean of train set
    meanlog = mdf_train[column + '_logn'].mean()
    
    if meanlog != meanlog:
      meanlog = 0

    #replace missing data with training set mean
    mdf_train[column + '_logn'] = mdf_train[column + '_logn'].fillna(meanlog)
    mdf_test[column + '_logn'] = mdf_test[column + '_logn'].fillna(meanlog)

#     #replace missing data with 0
#     mdf_train[column + '_log0'] = mdf_train[column + '_log0'].fillna(0)
#     mdf_test[column + '_log0'] = mdf_test[column + '_log0'].fillna(0)

#     #change data type for memory savings
#     mdf_train[column + '_log0'] = mdf_train[column + '_log0'].astype(np.float32)
#     mdf_test[column + '_log0'] = mdf_test[column + '_log0'].astype(np.float32)

    #create list of columns
    nmbrcolumns = [column + '_logn']

    nmbrnormalization_dict = {column + '_logn' : {'meanlog' : meanlog}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'logn', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
    
    return mdf_train, mdf_test, column_dict_list
  
  def process_sqrt_class(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    #process_sqrt_class(mdf_train, mdf_test, column, category)
    #function to apply square root transform
    #takes as arguement pandas dataframe of training and test data (mdf_train), (mdf_test)\
    #and the name of the column string ('column') and parent category (category)
    #applies a square root transform
    #replaces zeros, negative, and missing or improperly formatted data with post-log mean as default infill
    #returns same dataframes with new column of name column + '_log0'
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self.df_copy_train(mdf_train, column, column + '_sqrt', suffixoverlap_results)

      mdf_test[column + '_sqrt'] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self.df_check_suffixoverlap(mdf_train, column + '_sqrt', suffixoverlap_results)
      
      mdf_train.rename(columns = {column : column + '_sqrt'}, inplace = True)
      mdf_test.rename(columns = {column : column + '_sqrt'}, inplace = True)

    #convert all values to either numeric or NaN
    mdf_train[column + '_sqrt'] = pd.to_numeric(mdf_train[column + '_sqrt'], errors='coerce')
    mdf_test[column + '_sqrt'] = pd.to_numeric(mdf_test[column + '_sqrt'], errors='coerce')
    
#     #replace all zeros with nan for the log operation
#     zeroreplace = {0 : np.nan}
#     mdf_train[column + '_log0'] = mdf_train[column + '_log0'].replace(zeroreplace)
#     mdf_test[column + '_log0'] = mdf_test[column + '_log0'].replace(zeroreplace)
    
    #replace all non-positive with nan for the log operation
    mdf_train.loc[mdf_train[column + '_sqrt'] < 0, (column + '_sqrt')] = np.nan
    mdf_test.loc[mdf_test[column + '_sqrt'] < 0, (column + '_sqrt')] = np.nan
    
    #log transform column
    #note that this replaces negative values with nan which we will infill with mean
    mdf_train[column + '_sqrt'] = np.sqrt(mdf_train[column + '_sqrt'])
    mdf_test[column + '_sqrt'] = np.sqrt(mdf_test[column + '_sqrt'])
    
    #get mean of train set
    meansqrt = mdf_train[column + '_sqrt'].mean()
    
    if meansqrt != meansqrt:
      meansqrt = 0

    #replace missing data with training set mean
    mdf_train[column + '_sqrt'] = mdf_train[column + '_sqrt'].fillna(meansqrt)
    mdf_test[column + '_sqrt'] = mdf_test[column + '_sqrt'].fillna(meansqrt)

#     #replace missing data with 0
#     mdf_train[column + '_log0'] = mdf_train[column + '_log0'].fillna(0)
#     mdf_test[column + '_log0'] = mdf_test[column + '_log0'].fillna(0)

#     #change data type for memory savings
#     mdf_train[column + '_log0'] = mdf_train[column + '_log0'].astype(np.float32)
#     mdf_test[column + '_log0'] = mdf_test[column + '_log0'].astype(np.float32)

    #create list of columns
    nmbrcolumns = [column + '_sqrt']

    nmbrnormalization_dict = {column + '_sqrt' : {'meansqrt' : meansqrt}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'sqrt', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
    
    return mdf_train, mdf_test, column_dict_list
  
  def process_addd_class(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    #process_addd_class(.)
    #function to apply addition transform
    #takes as arguement pandas dataframe of training and test data (mdf_train), (mdf_test)\
    #and the name of the column string ('column') and parent category (category)
    #accepts parameter 'add' for amount of addition, otherwise defaults to adding 1
    #applies an addition transform
    #replaces non-numeric entries with set mean after addition
    #returns same dataframes with new column of name column + '_addd'
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    if 'add' in params:
        
      add = params['add']
    
    else:
      
      add = 1
    
    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self.df_copy_train(mdf_train, column, column + '_addd', suffixoverlap_results)

      mdf_test[column + '_addd'] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self.df_check_suffixoverlap(mdf_train, column + '_addd', suffixoverlap_results)
      
      mdf_train.rename(columns = {column : column + '_addd'}, inplace = True)
      mdf_test.rename(columns = {column : column + '_addd'}, inplace = True)

    #convert all values to either numeric or NaN
    mdf_train[column + '_addd'] = pd.to_numeric(mdf_train[column + '_addd'], errors='coerce')
    mdf_test[column + '_addd'] = pd.to_numeric(mdf_test[column + '_addd'], errors='coerce')
    
    
    #apply addition
    mdf_train[column + '_addd'] = mdf_train[column + '_addd'] + add
    mdf_test[column + '_addd'] = mdf_test[column + '_addd'] + add
    
    #get mean of train set
    mean = mdf_train[column + '_addd'].mean()
    
    if mean != mean:
      mean = 0

    #replace missing data with training set mean
    mdf_train[column + '_addd'] = mdf_train[column + '_addd'].fillna(mean)
    mdf_test[column + '_addd'] = mdf_test[column + '_addd'].fillna(mean)

    #create list of columns
    nmbrcolumns = [column + '_addd']

    nmbrnormalization_dict = {column + '_addd' : {'mean' : mean, \
                                                  'add' : add}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'addd', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return mdf_train, mdf_test, column_dict_list
  
  def process_sbtr_class(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    #process_sbtr_class(.)
    #function to apply subtraction transform
    #takes as arguement pandas dataframe of training and test data (mdf_train), (mdf_test)\
    #and the name of the column string ('column') and parent category (category)
    #accepts parameter 'subtract' for amount of subtraction, otherwise defaults to subtracting 1
    #applies a subtraction transform
    #replaces non-numeric entries with set mean after subtraction
    #returns same dataframes with new column of name column + '_sbtr'
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    if 'subtract' in params:
        
      subtract = params['subtract']
    
    else:
      
      subtract = 1
    
    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self.df_copy_train(mdf_train, column, column + '_sbtr', suffixoverlap_results)

      mdf_test[column + '_sbtr'] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self.df_check_suffixoverlap(mdf_train, column + '_sbtr', suffixoverlap_results)
      
      mdf_train.rename(columns = {column : column + '_sbtr'}, inplace = True)
      mdf_test.rename(columns = {column : column + '_sbtr'}, inplace = True)

    #convert all values to either numeric or NaN
    mdf_train[column + '_sbtr'] = pd.to_numeric(mdf_train[column + '_sbtr'], errors='coerce')
    mdf_test[column + '_sbtr'] = pd.to_numeric(mdf_test[column + '_sbtr'], errors='coerce')
    
    #apply subtraction
    mdf_train[column + '_sbtr'] = mdf_train[column + '_sbtr'] - subtract
    mdf_test[column + '_sbtr'] = mdf_test[column + '_sbtr'] - subtract
    
    #get mean of train set
    mean = mdf_train[column + '_sbtr'].mean()
    
    if mean != mean:
      mean = 0

    #replace missing data with training set mean
    mdf_train[column + '_sbtr'] = mdf_train[column + '_sbtr'].fillna(mean)
    mdf_test[column + '_sbtr'] = mdf_test[column + '_sbtr'].fillna(mean)

    #create list of columns
    nmbrcolumns = [column + '_sbtr']

    nmbrnormalization_dict = {column + '_sbtr' : {'mean' : mean, \
                                                  'subtract' : subtract}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'sbtr', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return mdf_train, mdf_test, column_dict_list
  
  def process_mltp_class(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    #process_mltp_class(.)
    #function to apply multiplication transform
    #takes as arguement pandas dataframe of training and test data (mdf_train), (mdf_test)\
    #and the name of the column string ('column') and parent category (category)
    #accepts parameter 'multiply' for amount of addition, otherwise defaults to multiplying 2
    #applies an multiplication transform
    #replaces non-numeric entries with set mean after addition
    #returns same dataframes with new column of name column + '_mltp'
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    if 'multiply' in params:
        
      multiply = params['multiply']
    
    else:
      
      multiply = 2
    
    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self.df_copy_train(mdf_train, column, column + '_mltp', suffixoverlap_results)

      mdf_test[column + '_mltp'] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self.df_check_suffixoverlap(mdf_train, column + '_mltp', suffixoverlap_results)
      
      mdf_train.rename(columns = {column : column + '_mltp'}, inplace = True)
      mdf_test.rename(columns = {column : column + '_mltp'}, inplace = True)

    #convert all values to either numeric or NaN
    mdf_train[column + '_mltp'] = pd.to_numeric(mdf_train[column + '_mltp'], errors='coerce')
    mdf_test[column + '_mltp'] = pd.to_numeric(mdf_test[column + '_mltp'], errors='coerce')
    
    #apply multiplication
    mdf_train[column + '_mltp'] = mdf_train[column + '_mltp'] * multiply
    mdf_test[column + '_mltp'] = mdf_test[column + '_mltp'] * multiply
    
    #get mean of train set
    mean = mdf_train[column + '_mltp'].mean()
    
    if mean != mean:
      mean = 0

    #replace missing data with training set mean
    mdf_train[column + '_mltp'] = mdf_train[column + '_mltp'].fillna(mean)
    mdf_test[column + '_mltp'] = mdf_test[column + '_mltp'].fillna(mean)

    #create list of columns
    nmbrcolumns = [column + '_mltp']


    nmbrnormalization_dict = {column + '_mltp' : {'mean' : mean, \
                                                  'multiply' : multiply}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'mltp', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return mdf_train, mdf_test, column_dict_list
  
  def process_divd_class(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    #process_divd_class(.)
    #function to apply division transform
    #accepts parameter 'divide' for amount of division, otherwise defaults to dividing by 2
    #applies an division transform
    #replaces non-numeric entries with set mean after division
    #returns same dataframes with new column of name column + '_divd'
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    if 'divide' in params:
        
      divide = params['divide']
    
    else:
      
      divide = 2
      
    #special case override to avoid div by 0
    if divide == 0:
      divide = 1
    
    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self.df_copy_train(mdf_train, column, column + '_divd', suffixoverlap_results)

      mdf_test[column + '_divd'] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self.df_check_suffixoverlap(mdf_train, column + '_divd', suffixoverlap_results)
      
      mdf_train.rename(columns = {column : column + '_divd'}, inplace = True)
      mdf_test.rename(columns = {column : column + '_divd'}, inplace = True)

    #convert all values to either numeric or NaN
    mdf_train[column + '_divd'] = pd.to_numeric(mdf_train[column + '_divd'], errors='coerce')
    mdf_test[column + '_divd'] = pd.to_numeric(mdf_test[column + '_divd'], errors='coerce')
    
    
    #apply multiplication
    mdf_train[column + '_divd'] = mdf_train[column + '_divd'] / divide
    mdf_test[column + '_divd'] = mdf_test[column + '_divd'] / divide
    
    #get mean of train set
    mean = mdf_train[column + '_divd'].mean()
    
    if mean != mean:
      mean = 0

    #replace missing data with training set mean
    mdf_train[column + '_divd'] = mdf_train[column + '_divd'].fillna(mean)
    mdf_test[column + '_divd'] = mdf_test[column + '_divd'].fillna(mean)


    #create list of columns
    nmbrcolumns = [column + '_divd']


    nmbrnormalization_dict = {column + '_divd' : {'mean' : mean, \
                                                  'divide' : divide}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'divd', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return mdf_train, mdf_test, column_dict_list
  
  def process_rais_class(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    #process_rais_class(.)
    #function to apply raise to a power transform
    #accepts parameter 'raiser' for amount of power, otherwise defaults to square (raise by 2)
    #applies an raise transform
    #replaces non-numeric entries with set mean after raise
    #returns same dataframes with new column of name column + '_rais'
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    if 'raiser' in params:
        
      raiser = params['raiser']
    
    else:
      
      raiser = 2
    
    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self.df_copy_train(mdf_train, column, column + '_rais', suffixoverlap_results)

      mdf_test[column + '_rais'] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self.df_check_suffixoverlap(mdf_train, column + '_rais', suffixoverlap_results)
      
      mdf_train.rename(columns = {column : column + '_rais'}, inplace = True)
      mdf_test.rename(columns = {column : column + '_rais'}, inplace = True)

    #convert all values to either numeric or NaN
    mdf_train[column + '_rais'] = pd.to_numeric(mdf_train[column + '_rais'], errors='coerce')
    mdf_test[column + '_rais'] = pd.to_numeric(mdf_test[column + '_rais'], errors='coerce')
    
    #apply addition
    mdf_train[column + '_rais'] = mdf_train[column + '_rais'] ** raiser
    mdf_test[column + '_rais'] = mdf_test[column + '_rais'] ** raiser
    
    #get mean of train set
    mean = mdf_train[column + '_rais'].mean()
    
    if mean != mean:
      mean = 0

    #replace missing data with training set mean
    mdf_train[column + '_rais'] = mdf_train[column + '_rais'].fillna(mean)
    mdf_test[column + '_rais'] = mdf_test[column + '_rais'].fillna(mean)

    #create list of columns
    nmbrcolumns = [column + '_rais']

    nmbrnormalization_dict = {column + '_rais' : {'mean' : mean, \
                                                  'raiser' : raiser}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'rais', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return mdf_train, mdf_test, column_dict_list
  
  def process_absl_class(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    #process_absl_class(.)
    #function to apply absolute transform
    #does not accept paraemters
    #applies an absolute transform
    #replaces non-numeric entries with set mean after transform
    #returns same dataframes with new column of name column + '_absl'
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self.df_copy_train(mdf_train, column, column + '_absl', suffixoverlap_results)

      mdf_test[column + '_absl'] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self.df_check_suffixoverlap(mdf_train, column + '_absl', suffixoverlap_results)
      
      mdf_train.rename(columns = {column : column + '_absl'}, inplace = True)
      mdf_test.rename(columns = {column : column + '_absl'}, inplace = True)

    #convert all values to either numeric or NaN
    mdf_train[column + '_absl'] = pd.to_numeric(mdf_train[column + '_absl'], errors='coerce')
    mdf_test[column + '_absl'] = pd.to_numeric(mdf_test[column + '_absl'], errors='coerce')
    
    #apply addition
    mdf_train[column + '_absl'] = mdf_train[column + '_absl'].abs()
    mdf_test[column + '_absl'] = mdf_test[column + '_absl'].abs()
    
    #get mean of train set
    mean = mdf_train[column + '_absl'].mean()
    
    if mean != mean:
      mean = 0

    #replace missing data with training set mean
    mdf_train[column + '_absl'] = mdf_train[column + '_absl'].fillna(mean)
    mdf_test[column + '_absl'] = mdf_test[column + '_absl'].fillna(mean)

    #create list of columns
    nmbrcolumns = [column + '_absl']

    nmbrnormalization_dict = {column + '_absl' : {'mean' : mean}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'absl', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return mdf_train, mdf_test, column_dict_list

  def process_pwrs_class(self, mdf_train, mdf_test, column, category, postprocess_dict, params = {}):
    '''
    #processes a numerical set by creating bins corresponding to powers
    #of ten in one hot encoded columns
    
    #pwrs will be intended for a raw set that is not yet normalized
    
    #postiive values encoded under column 'column' + '_10^#' where # is power of 10
    #0 and negative values considered infill with no activations
    
    #if all values are infill no columns returned
    
    #accepts boolean 'negvalues' parameter, defaults False, True activates encoding for values <0
    '''
    
    suffixoverlap_results = {}
    
    if 'negvalues' in params:
      negvalues = params['negvalues']
    else:
      negvalues = False
    
    tempcolumn = column + '_-10^'

    #store original column for later reversion
    mdf_train, suffixoverlap_results = \
    self.df_copy_train(mdf_train, column, tempcolumn, suffixoverlap_results)
    
    mdf_test[tempcolumn] = mdf_test[column].copy()

    #convert all values to either numeric or NaN
    mdf_train[tempcolumn] = pd.to_numeric(mdf_train[tempcolumn], errors='coerce')
    mdf_test[tempcolumn] = pd.to_numeric(mdf_test[tempcolumn], errors='coerce')
    
    #create copy with negative values
    negtempcolumn = column + '_negtemp'
    mdf_train, suffixoverlap_results = \
    self.df_copy_train(mdf_train, tempcolumn, negtempcolumn, suffixoverlap_results)
    
    mdf_test[negtempcolumn] = mdf_test[tempcolumn].copy()
    
    #convert all values in negtempcolumn >= 0 to Nan
    mdf_train[negtempcolumn] = \
    np.where(mdf_train[negtempcolumn] >= 0, np.nan, mdf_train[negtempcolumn].values)
    mdf_test[negtempcolumn] = \
    np.where(mdf_test[negtempcolumn] >= 0, np.nan, mdf_test[negtempcolumn].values)
    
    #convert all values <= 0 to Nan
    mdf_train[tempcolumn] = \
    np.where(mdf_train[tempcolumn] <= 0, np.nan, mdf_train[tempcolumn].values)
    mdf_test[tempcolumn] = \
    np.where(mdf_test[tempcolumn] <= 0, np.nan, mdf_test[tempcolumn].values)
    
    #log transform column
    
    #take abs value of negtempcolumn
    mdf_train[negtempcolumn] = mdf_train[negtempcolumn].abs()
    mdf_test[negtempcolumn] = mdf_test[negtempcolumn].abs()
    
    mdf_train[negtempcolumn] = \
    np.where(mdf_train[negtempcolumn] != np.nan, np.floor(np.log10(mdf_train[negtempcolumn])), mdf_train[negtempcolumn].values)
    mdf_test[negtempcolumn] = \
    np.where(mdf_test[negtempcolumn] != np.nan, np.floor(np.log10(mdf_test[negtempcolumn])), mdf_test[negtempcolumn].values)
    
    train_neg_dict = {}
    newunique_list = []
    negunique = mdf_train[negtempcolumn].unique()
    for unique in negunique:
      if unique != unique:
        newunique = np.nan
      else:
        #this is update for difference between pwr2 and pwrs
        if negvalues:
          newunique = column + '_-10^' + str(int(unique))
        else:
          newunique = np.nan
      train_neg_dict.update({unique : newunique})
      newunique_list.append(newunique)
      
    test_neg_dict = {}
    negunique = mdf_test[negtempcolumn].unique()
    for unique in negunique:
      if unique != unique:
        newunique = np.nan
      else:
        #this is update for difference between pwr2 and pwrs
        if negvalues:
          newunique = column + '_-10^' + str(int(unique))
        else:
          newunique = np.nan
      if newunique in newunique_list and newunique == newunique:
        test_neg_dict.update({unique : newunique})
      else:
        test_neg_dict.update({unique : np.nan})
        
    mdf_train[negtempcolumn] = mdf_train[negtempcolumn].replace(train_neg_dict)
    mdf_test[negtempcolumn] = mdf_test[negtempcolumn].replace(test_neg_dict)
    
    #now log trasnform positive values in column column 

    mdf_train[tempcolumn] = \
    np.where(mdf_train[tempcolumn] != np.nan, np.floor(np.log10(mdf_train[tempcolumn])), mdf_train[tempcolumn].values)
    mdf_test[tempcolumn] = \
    np.where(mdf_test[tempcolumn] != np.nan, np.floor(np.log10(mdf_test[tempcolumn])), mdf_test[tempcolumn].values)

    train_pos_dict = {}
    newposunique_list = []
    posunique = mdf_train[tempcolumn].unique()
    for unique in posunique:
      if unique != unique:
        newunique = np.nan
      else:
        newunique = column + '_10^' + str(int(unique))
      train_pos_dict.update({unique : newunique})
      newposunique_list.append(newunique)
      
    test_pos_dict = {}
    posunique = mdf_test[tempcolumn].unique()
    for unique in posunique:
      if unique != unique:
        newunique = np.nan
      else:
        newunique = column + '_10^' + str(int(unique))
      if newunique in newposunique_list and newunique == newunique:
        test_pos_dict.update({unique : newunique})
      else:
        test_pos_dict.update({unique : np.nan})
    
    mdf_train[tempcolumn] = mdf_train[tempcolumn].replace(train_pos_dict)
    mdf_test[tempcolumn] = mdf_test[tempcolumn].replace(test_pos_dict)
    
    #combine the two columns
    mdf_train[tempcolumn] = mdf_train[negtempcolumn].where(mdf_train[negtempcolumn] == mdf_train[negtempcolumn], mdf_train[tempcolumn])
    mdf_test[tempcolumn] = mdf_test[negtempcolumn].where(mdf_test[negtempcolumn] == mdf_test[negtempcolumn], mdf_test[tempcolumn])
    
    #pandas one hot encoder
    df_train_cat = pd.get_dummies(mdf_train[tempcolumn])
    df_test_cat = pd.get_dummies(mdf_test[tempcolumn])
    
    labels_train = list(df_train_cat)
    labels_test = list(df_test_cat)

    #Get missing columns in test set that are present in training set
    missing_cols = set( df_train_cat.columns ) - set( df_test_cat.columns )
    
    #Add a missing column in test set with default value equal to 0
    for c in missing_cols:
        df_test_cat[c] = 0
    #Ensure the order of column in the test set is in the same order than in train set
    #Note this also removes categories in test set that aren't present in training set
    df_test_cat = df_test_cat[df_train_cat.columns]
    
    suffixoverlap_results = \
    self.df_check_suffixoverlap(mdf_train, list(df_train_cat), suffixoverlap_results)
    
    #concatinate the sparse set with the rest of our training data
    mdf_train = pd.concat([mdf_train, df_train_cat], axis=1)
    mdf_test = pd.concat([mdf_test, df_test_cat], axis=1)
    
    #replace original column from training data
    
    del mdf_train[negtempcolumn]    
    del mdf_test[negtempcolumn]
    
    del mdf_train[tempcolumn]    
    del mdf_test[tempcolumn]
    
    #create output of a list of the created column names
#     NAcolumn = columnNAr2
    labels_train = list(df_train_cat)
#     if NAcolumn in labels_train:
#       labels_train.remove(NAcolumn)
    powercolumns = labels_train
  
    #change data type for memory savings
    for powercolumn in powercolumns:
      mdf_train[powercolumn] = mdf_train[powercolumn].astype(np.int8)
      mdf_test[powercolumn] = mdf_test[powercolumn].astype(np.int8)
    
    normalizationdictvalues = labels_train
    normalizationdictkeys = powercolumns
    
    normalizationdictkeys.sort()
    normalizationdictvalues.sort()
    
    powerlabelsdict = dict(zip(normalizationdictkeys, normalizationdictvalues))
    
    #store some values in the text_dict{} for use later in ML infill methods
    column_dict_list = []
    
    for pc in powercolumns:
      
      #new parameter collected for driftreport
      tc_ratio = pc + '_ratio'
      tcratio = mdf_train[pc].sum() / mdf_train[pc].shape[0]

      powernormalization_dict = {pc : {'powerlabelsdict_pwrs' : powerlabelsdict, \
                                       'labels_train' : labels_train, \
                                       'missing_cols' : missing_cols, \
                                       'negvalues' : negvalues, \
                                       tc_ratio : tcratio}}
    
      column_dict = {pc : {'category' : 'pwrs', \
                           'origcategory' : category, \
                           'normalization_dict' : powernormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : powercolumns, \
                           'categorylist' : powercolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}
        
      column_dict_list.append(column_dict.copy())
    
    return mdf_train, mdf_test, column_dict_list
  
  def process_pwor_class(self, mdf_train, mdf_test, column, category, postprocess_dict, params = {}):
    '''
    #processes a numerical set by creating bins coresponding to powers
    #of ten in ordinal encoded columns
    
    #pwrs will be intended for a raw set that is not yet normalized
    
    #infill has 0, other designations are based on the data
    
    #negative values based on negvalues parameter, makes comparable to por2
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    if 'negvalues' in params:
      negvalues = params['negvalues']
    else:
      negvalues = False
    
    pworcolumn = column + '_pwor'
    
    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self.df_copy_train(mdf_train, column, pworcolumn, suffixoverlap_results)
      
      mdf_test[pworcolumn] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self.df_check_suffixoverlap(mdf_train, pworcolumn, suffixoverlap_results)
      
      mdf_train.rename(columns = {column : pworcolumn}, inplace = True)
      mdf_test.rename(columns = {column : pworcolumn}, inplace = True)
    
    #convert all values to either numeric or NaN
    mdf_train[pworcolumn] = pd.to_numeric(mdf_train[pworcolumn], errors='coerce')
    mdf_test[pworcolumn] = pd.to_numeric(mdf_test[pworcolumn], errors='coerce')
    
    #copy set for negative values
    negtempcolumn = column + '_negtempcolumn'
    
    mdf_train, suffixoverlap_results = \
    self.df_copy_train(mdf_train, pworcolumn, negtempcolumn, suffixoverlap_results)
    
    mdf_test[negtempcolumn] = mdf_test[pworcolumn].copy()
    
    #convert all values >= 0 to Nan
    mdf_train[negtempcolumn] = \
    np.where(mdf_train[negtempcolumn] >= 0, np.nan, mdf_train[negtempcolumn].values)
    mdf_test[negtempcolumn] = \
    np.where(mdf_test[negtempcolumn] >= 0, np.nan, mdf_test[negtempcolumn].values)
    
    #take abs value of negtempcolumn
    mdf_train[negtempcolumn] = mdf_train[negtempcolumn].abs()
    mdf_test[negtempcolumn] = mdf_test[negtempcolumn].abs()
    
    #convert all values <= 0 in column to Nan
    mdf_train[pworcolumn] = \
    np.where(mdf_train[pworcolumn] <= 0, np.nan, mdf_train[pworcolumn].values)
    mdf_test[pworcolumn] = \
    np.where(mdf_test[pworcolumn] <= 0, np.nan, mdf_test[pworcolumn].values)

    mdf_train[pworcolumn] = \
    np.where(mdf_train[pworcolumn] != np.nan, np.floor(np.log10(mdf_train[pworcolumn])), mdf_train[pworcolumn].values)
    mdf_test[pworcolumn] = \
    np.where(mdf_test[pworcolumn] != np.nan, np.floor(np.log10(mdf_test[pworcolumn])), mdf_test[pworcolumn].values)
    
    #do same for negtempcolumn
    mdf_train[negtempcolumn] = \
    np.where(mdf_train[negtempcolumn] != np.nan, np.floor(np.log10(mdf_train[negtempcolumn])), mdf_train[negtempcolumn].values)
    mdf_test[negtempcolumn] = \
    np.where(mdf_test[negtempcolumn] != np.nan, np.floor(np.log10(mdf_test[negtempcolumn])), mdf_test[negtempcolumn].values)

    train_neg_dict = {}
    newunique_list = []
    negunique = mdf_train[negtempcolumn].unique()
    for unique in negunique:
      if unique != unique:
        newunique = np.nan
      else:
        #this is update for difference between pwr2 and pwrs
        if negvalues:
          newunique = column + '_-10^' + str(int(unique))
        else:
          newunique = np.nan
      train_neg_dict.update({unique : newunique})
      newunique_list.append(newunique)
      
    test_neg_dict = {}
    negunique = mdf_test[negtempcolumn].unique()
    for unique in negunique:
      if unique != unique:
        newunique = np.nan
      else:
        #this is update for difference between pwr2 and pwrs
        if negvalues:
          newunique = column + '_-10^' + str(int(unique))
        else:
          newunique = np.nan
      if newunique in newunique_list and newunique == newunique:
        test_neg_dict.update({unique : newunique})
      else:
        test_neg_dict.update({unique : np.nan})
        
    mdf_train[negtempcolumn] = mdf_train[negtempcolumn].replace(train_neg_dict)
    mdf_test[negtempcolumn] = mdf_test[negtempcolumn].replace(test_neg_dict)
    
    #now do same for column
    train_pos_dict = {}
    newposunique_list = []
    posunique = mdf_train[pworcolumn].unique()
    for unique in posunique:
      if unique != unique:
        newunique = np.nan
      else:
        newunique = column + '_10^' + str(int(unique))
      train_pos_dict.update({unique : newunique})
      newposunique_list.append(newunique)
      
    test_pos_dict = {}
    posunique = mdf_test[pworcolumn].unique()
    for unique in posunique:
      if unique != unique:
        newunique = np.nan
      else:
        newunique = column + '_10^' + str(int(unique))
      if newunique in newposunique_list and newunique == newunique:
        test_pos_dict.update({unique : newunique})
      else:
        test_pos_dict.update({unique : np.nan})
    
    mdf_train[pworcolumn] = mdf_train[pworcolumn].replace(train_pos_dict)
    mdf_test[pworcolumn] = mdf_test[pworcolumn].replace(test_pos_dict)
    
    #combine the two columns
    mdf_train[pworcolumn] = mdf_train[negtempcolumn].where(mdf_train[negtempcolumn] == mdf_train[negtempcolumn], mdf_train[pworcolumn])
    mdf_test[pworcolumn] = mdf_test[negtempcolumn].where(mdf_test[negtempcolumn] == mdf_test[negtempcolumn], mdf_test[pworcolumn])
    
    train_unique = mdf_train[pworcolumn].unique()
    test_unique = mdf_test[pworcolumn].unique()
  
    #Get missing entries in test set that are present in training set
    missing_cols = set( list(train_unique) ) - set( list(test_unique) )
    
    extra_cols = set( list(test_unique) ) - set( list(train_unique) )
    
    train_replace_dict = {}
    train_len = len(train_unique)
    for i in range(train_len):
      if train_unique[i] != train_unique[i]:
        train_replace_dict.update({train_unique[i] : 0})
      else:
        train_replace_dict.update({train_unique[i] : i+1})
    if np.nan not in train_replace_dict:
      train_replace_dict.update({np.nan : 0})
      
    test_replace_dict = {}
    for testunique in test_unique:
      if testunique in train_unique:
        test_replace_dict.update({testunique : train_replace_dict[testunique]})
      else:
        test_replace_dict.update({testunique : 0})
    
#     pworcolumn = column + '_por2'
#     mdf_train[pworcolumn] = mdf_train[column].copy()
#     mdf_test[pworcolumn] = mdf_test[column].copy()
    
    mdf_train[pworcolumn] = mdf_train[pworcolumn].replace(train_replace_dict)
    mdf_test[pworcolumn] = mdf_test[pworcolumn].replace(test_replace_dict)
    
    #replace original column from training data
    del mdf_train[negtempcolumn]    
    del mdf_test[negtempcolumn]    
    
#     del mdf_train[column]    
#     del mdf_test[column]
    
#     mdf_train[column] = mdf_train[column + '_temp'].copy()
#     mdf_test[column] = mdf_test[column + '_temp'].copy()

#     del mdf_train[column + '_temp']    
#     del mdf_test[column + '_temp']
        
    #store some values in the text_dict{} for use later in ML infill methods
    column_dict_list = []
    
    powercolumns = [pworcolumn]
    
    #new driftreport metric ordl_activations_dict
    ordl_activations_dict = {}
    for unique in mdf_train[pworcolumn].unique():
      sumcalc = (mdf_train[pworcolumn] == unique).sum() 
      ratio = sumcalc / mdf_train[pworcolumn].shape[0]
      ordl_activations_dict.update({unique:ratio})

    inverse_train_replace_dict = {value:key for key,value in train_replace_dict.items()}
    activations_list = list(inverse_train_replace_dict)
    
    for pc in powercolumns:

      powernormalization_dict = {pc : {'train_replace_dict' : train_replace_dict, \
                                       'inverse_train_replace_dict' : inverse_train_replace_dict, \
                                       'activations_list' : activations_list, \
                                       'test_replace_dict' : test_replace_dict, \
                                       'ordl_activations_dict' : ordl_activations_dict, \
                                       'negvalues' : negvalues}}
    
      column_dict = {pc : {'category' : 'pwor', \
                           'origcategory' : category, \
                           'normalization_dict' : powernormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : powercolumns, \
                           'categorylist' : powercolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}
        
      column_dict_list.append(column_dict.copy())
    
    return mdf_train, mdf_test, column_dict_list

  def process_bins_class(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    #bins processes a numerical set by creating bins coresponding to post z score
    #normalization of <-2, -2-1, -10, 01, 12, >2 in one hot encoded columns
    
    #bins will be intended for a raw set that is not normalized
    #bint will be intended for a previously normalized set
    
    #bins accepts a parameter bincount 
    #as integer for number of bins
    #where if bincount is an odd number the center bin straddles the mean
    #and if bincount is even the center two bins straddle the mean
    #defaults to 6 bins
    #suffix appender is '_bins_#'' where # is integer for bin id
    '''
    
    suffixoverlap_results = {}
    
    if 'bincount' in params:
      bincount = params['bincount']
    else:
      bincount = 6
    
    #if data is known to be z-score normalized we'll reduce the computational overhead
    if 'normalizedinput' in params:
      normalizedinput = params['normalizedinput']
    else:
      normalizedinput = False
    
    binscolumn = column + '_bins'
    
    if bincount > 0:

      mdf_train, suffixoverlap_results = \
      self.df_copy_train(mdf_train, column, binscolumn, suffixoverlap_results)

      mdf_test[binscolumn] = mdf_test[column].copy()

      #convert all values to either numeric or NaN
      mdf_train[binscolumn] = pd.to_numeric(mdf_train[binscolumn], errors='coerce')
      mdf_test[binscolumn] = pd.to_numeric(mdf_test[binscolumn], errors='coerce')
      
      if normalizedinput is False:

        #get mean of training data
        mean = mdf_train[binscolumn].mean()

        if mean != mean:
          mean = 0

        #get standard deviation of training data
        std = mdf_train[binscolumn].std()

        #special case, if standard deviation is 0 we'll set it to 1 to avoid division by 0
        if std == 0:
          std = 1

        if std != std:
          std = 1
      
      else:
        mean = 0
        std = 1
      
      #replace missing data with training set mean
      mdf_train[binscolumn] = mdf_train[binscolumn].fillna(mean)
      mdf_test[binscolumn] = mdf_test[binscolumn].fillna(mean)
      
      if normalizedinput is False:

        #z-score normalize
        mdf_train[binscolumn] = (mdf_train[binscolumn] - mean) / std
        mdf_test[binscolumn] = (mdf_test[binscolumn] - mean) / std

      #derive cuts based on bincount

      bincuts = []

      mincut = - (bincount - 2) / 2
      bincuts.append(-float('inf'))

      for i in range(bincount - 1):
        bincuts.append(mincut)
        mincut += 1

      bincuts.append(float('inf'))

      binlabels = list(range(bincount))
      binlabels = list(map(str, binlabels))

      #create bins based on standard deviation increments
  #     binscolumn = column + '_bins'
      mdf_train[binscolumn] = \
      pd.cut( mdf_train[binscolumn], bins = bincuts,  \
             labels = binlabels, precision=4)
      mdf_test[binscolumn] = \
      pd.cut( mdf_test[binscolumn], bins = bincuts,  \
             labels = binlabels, precision=4)

      #returned column headers
      textcolumns = []
      for binlabel in binlabels:
        textcolumns.append(binscolumn + '_' + binlabel)

      suffixoverlap_results = \
      self.df_check_suffixoverlap(mdf_train, textcolumns, suffixoverlap_results)

      #process bins as a categorical set
      mdf_train = \
      self.postprocess_textsupport_class(mdf_train, binscolumn, {}, 'tempkey', {'textcolumns':textcolumns})
      mdf_test = \
      self.postprocess_textsupport_class(mdf_test, binscolumn, {}, 'tempkey', {'textcolumns':textcolumns})

      #change data type for memory savings
      for textcolumn in textcolumns:
        mdf_train[textcolumn] = mdf_train[textcolumn].astype(np.int8)
        mdf_test[textcolumn] = mdf_test[textcolumn].astype(np.int8)

      #delete the support column
      del mdf_train[binscolumn]
      del mdf_test[binscolumn]
    
      #store some values in the nmbr_dict{} for use later in ML infill methods
      column_dict_list = []
      
      for nc in textcolumns:

        #new parameter collected for driftreport
        tc_ratio = nc + '_ratio'
        tcratio = mdf_train[nc].sum() / mdf_train[nc].shape[0]

        nmbrnormalization_dict = {nc : {'bincuts' : bincuts, \
                                        'binlabels' : binlabels, \
                                        'binscolumns' : textcolumns, \
                                        'bincount' : bincount, \
                                        'binsmean' : mean, \
                                        'binsstd' : std, \
                                        'normalizedinput' : normalizedinput, \
                                        tc_ratio : tcratio}}

        column_dict = { nc : {'category' : 'bins', \
                              'origcategory' : category, \
                              'normalization_dict' : nmbrnormalization_dict, \
                              'origcolumn' : column, \
                              'inputcolumn' : column, \
                              'columnslist' : textcolumns, \
                              'categorylist' : textcolumns, \
                              'infillmodel' : False, \
                              'infillcomplete' : False, \
                              'suffixoverlap_results' : suffixoverlap_results, \
                              'deletecolumn' : False}}

        column_dict_list.append(column_dict.copy())
          
    else:
      
      column_dict_list = []

    return mdf_train, mdf_test, column_dict_list

  def process_bsor_class(self, mdf_train, mdf_test, column, category, postprocess_dict, params = {}):
    '''
    #bins processes a numerical set by creating bins coresponding to post z score
    #normalization of <-2, -2-1, -10, 01, 12, >2 in one hot encoded columns
    
    #bins accepts a parameter bincount 
    #as integer for number of bins
    #where if bincount is an odd number the center bin straddles the mean
    #and if bincount is even the center two bins straddle the mean
    #defaults to 5 bins
    #suffix appender is '_bins_#'' where # is integer for bin id
    
    #bsor is comparable to bins but returns ordinal encoded column
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    if 'bincount' in params:
      bincount = params['bincount']
    else:
      bincount = 6
      
    #if data is known to be z-score normalized we'll reduce the computational overhead
    if 'normalizedinput' in params:
      normalizedinput = params['normalizedinput']
    else:
      normalizedinput = False
    
    binscolumn = column + '_bsor'
    
    if bincount > 0:

      if inplace is not True:

        #copy source column into new column
        mdf_train, suffixoverlap_results = \
        self.df_copy_train(mdf_train, column, binscolumn, suffixoverlap_results)

        mdf_test[binscolumn] = mdf_test[column].copy()

      else:

        suffixoverlap_results = \
        self.df_check_suffixoverlap(mdf_train, binscolumn, suffixoverlap_results)

        mdf_train.rename(columns = {column : binscolumn}, inplace = True)
        mdf_test.rename(columns = {column : binscolumn}, inplace = True)
      
      #convert all values to either numeric or NaN
      mdf_train[binscolumn] = pd.to_numeric(mdf_train[binscolumn], errors='coerce')
      mdf_test[binscolumn] = pd.to_numeric(mdf_test[binscolumn], errors='coerce')
      
      if normalizedinput is False:

        #get mean of training data
        mean = mdf_train[binscolumn].mean()
        if mean != mean:
          mean = 0

        #get standard deviation of training data
        std = mdf_train[binscolumn].std()
        if std == 0:
          std = 1
        if std != std:
          std = 1
          
      else:
        
        mean = 0
        std = 1

      #replace missing data with training set mean
      mdf_train[binscolumn] = mdf_train[binscolumn].fillna(mean)
      mdf_test[binscolumn] = mdf_test[binscolumn].fillna(mean)
      
      if normalizedinput is False:

        #z-score normalize
        mdf_train[binscolumn] = (mdf_train[binscolumn] - mean) / std
        mdf_test[binscolumn] = (mdf_test[binscolumn] - mean) / std
      
      #derive cuts based on bincount

      bincuts = []

      mincut = - (bincount - 2) / 2
      bincuts.append(-float('inf'))

      for i in range(bincount - 1):
        bincuts.append(mincut)
        mincut += 1

      bincuts.append(float('inf'))

      binlabels = list(range(bincount))

  #     binscolumn = column + '_bsor'
      mdf_train[binscolumn] = \
      pd.cut( mdf_train[binscolumn], bins = bincuts,  \
             labels = binlabels, precision=4)
      mdf_test[binscolumn] = \
      pd.cut( mdf_test[binscolumn], bins = bincuts,  \
             labels = binlabels, precision=4)

      ordinal_dict = {}
      for binlabel in binlabels:
        ordinal_dict.update({str(binlabel) : binlabel})

      #new driftreport metric ordl_activations_dict
      ordl_activations_dict = {}
      for key in ordinal_dict:
        sumcalc = (mdf_train[binscolumn] == ordinal_dict[key]).sum() 
        ratio = sumcalc / mdf_train[binscolumn].shape[0]
        ordl_activations_dict.update({key:ratio})

      inverse_ordinal_dict = {value:key for key,value in ordinal_dict.items()}
      activations_list = list(inverse_ordinal_dict)

      mdf_train[binscolumn] = mdf_train[binscolumn].astype(np.int8)
      mdf_test[binscolumn] = mdf_test[binscolumn].astype(np.int8)

      #create list of columns
      nmbrcolumns = [binscolumn]

      #nmbrnormalization_dict = {'mean' : mean, 'std' : std}

      #store some values in the nmbr_dict{} for use later in ML infill methods
      column_dict_list = []

      for nc in nmbrcolumns:

        nmbrnormalization_dict = {nc : {'ordinal_dict' : ordinal_dict, \
                                        'inverse_ordinal_dict' : inverse_ordinal_dict, \
                                        'activations_list' : activations_list, \
                                        'ordl_activations_dict' : ordl_activations_dict, \
                                        'binsmean' : mean, \
                                        'binsstd' : std, \
                                        'normalizedinput' : normalizedinput, \
                                        'bincount' : bincount, \
                                        'bincuts' : bincuts, \
                                        'binlabels' : binlabels}}

        column_dict = { nc : {'category' : 'bsor', \
                              'origcategory' : category, \
                              'normalization_dict' : nmbrnormalization_dict, \
                              'origcolumn' : column, \
                              'inputcolumn' : column, \
                              'columnslist' : nmbrcolumns, \
                              'categorylist' : nmbrcolumns, \
                              'infillmodel' : False, \
                              'infillcomplete' : False, \
                              'suffixoverlap_results' : suffixoverlap_results, \
                              'deletecolumn' : False}}

        column_dict_list.append(column_dict.copy())

    else:
      
      column_dict_list = []
    
    return mdf_train, mdf_test, column_dict_list
  
  def process_bnwd_class(self, mdf_train, mdf_test, column, category, postprocess_dict, params = {}):
    '''
    #processes a numerical set by creating equal width bins coresponding to 
    #parameter 'width' which defaults to 1
    #and returning in one-hot encoded set
    
    #deletes columns without activations
    
    #can be applied top either a raw set not yet normalized or after normalization
    #such as after z-score normalization)
    '''
    
    suffixoverlap_results = {}
    
    if 'width' in params:
      bn_width = params['width']
    else:
      bn_width = 1
      
    if 'suffix' in params:
      suffix = params['suffix']
    else:
      suffix = '_bnwd'
      
    binscolumn = column + suffix

    #store original column for later reversion
    mdf_train, suffixoverlap_results = \
    self.df_copy_train(mdf_train, column, binscolumn, suffixoverlap_results)
    
    mdf_test[binscolumn] = mdf_test[column].copy()

    #convert all values to either numeric or NaN
    mdf_train[binscolumn] = pd.to_numeric(mdf_train[binscolumn], errors='coerce')
    mdf_test[binscolumn] = pd.to_numeric(mdf_test[binscolumn], errors='coerce')

    #get mean of training data
    mean = mdf_train[binscolumn].mean()
    
    if mean != mean:
      mean = 0

    #replace missing data with training set mean
    mdf_train[binscolumn] = mdf_train[binscolumn].fillna(mean)
    mdf_test[binscolumn] = mdf_test[binscolumn].fillna(mean)

    #evaluate train set for transformation parameters
    bn_min = mdf_train[binscolumn].min()
    bn_max = mdf_train[binscolumn].max()
    bn_delta = bn_max - bn_min
    if bn_delta == 0:
      bn_delta = 1
    bn_count = int(np.ceil(bn_delta / bn_width))
    
    bins_id = []
    for i in range(bn_count):
      bins_id.append(str(bn_width) + '_' + str(i))
      
    bins_cuts = [-float('inf'), float('inf')]
    for i in range(bn_count-1):
      bins_cuts.insert(-1,(bn_min + (i+1) * bn_width))
      
    #create bins based on standard deviation increments
#     binscolumn = column + '_bnwd'
    mdf_train[binscolumn] = \
    pd.cut(mdf_train[binscolumn], bins = bins_cuts,  \
           labels = bins_id, precision=len(str(bn_count)))
    mdf_test[binscolumn] = \
    pd.cut(mdf_test[binscolumn], bins = bins_cuts,  \
           labels = bins_id, precision=len(str(bn_count)))

    foundinset = mdf_train[binscolumn].unique()
    
    textcolumns = []
    for i in foundinset:
      textcolumns.append(binscolumn + '_' + str(i))
      
    #postprocess_textsupport_class will return columns in alphabetical order
    textcolumns.sort()
    
    suffixoverlap_results = \
    self.df_check_suffixoverlap(mdf_train, textcolumns, suffixoverlap_results)
    
    #process bins as a categorical set
    mdf_train = \
    self.postprocess_textsupport_class(mdf_train, binscolumn, {}, 'tempkey', {'textcolumns':textcolumns})
    mdf_test = \
    self.postprocess_textsupport_class(mdf_test, binscolumn, {}, 'tempkey', {'textcolumns':textcolumns})
    
    #change data type for memory savings
    for textcolumn in textcolumns:
      mdf_train[textcolumn] = mdf_train[textcolumn].astype(np.int8)
      mdf_test[textcolumn] = mdf_test[textcolumn].astype(np.int8)
    
    #delete the support column
    del mdf_train[binscolumn]
    del mdf_test[binscolumn]

    #nmbrnormalization_dict = {'mean' : mean, 'std' : std}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in textcolumns:
      
      #new parameter collected for driftreport
      tc_ratio = nc + '_ratio'
      tcratio = mdf_train[nc].sum() / mdf_train[nc].shape[0]

      nmbrnormalization_dict = {nc : {'suffix' : suffix, \
                                      'binsmean' : mean, \
                                      'bn_min' : bn_min, \
                                      'bn_max' : bn_max, \
                                      'bn_delta' : bn_delta, \
                                      'bn_count' : bn_count, \
                                      'bins_id' : bins_id, \
                                      'bins_cuts' : bins_cuts, \
                                      'bn_width_bnwd' : bn_width, \
                                      'textcolumns' : textcolumns, \
                                      tc_ratio : tcratio}}

      column_dict = { nc : {'category' : 'bnwd', \
                            'origcategory' : category, \
                            'normalization_dict' : nmbrnormalization_dict, \
                            'origcolumn' : column, \
                            'inputcolumn' : column, \
                            'columnslist' : textcolumns, \
                            'categorylist' : textcolumns, \
                            'infillmodel' : False, \
                            'infillcomplete' : False, \
                            'suffixoverlap_results' : suffixoverlap_results, \
                            'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
       
    return mdf_train, mdf_test, column_dict_list

  def process_bnwo_class(self, mdf_train, mdf_test, column, category, postprocess_dict, params = {}):
    '''
    #processes a numerical set by creating equal width bins coresponding to 
    #parameter 'width' which defaults to 1
    #and returning in ordinal encoded set
    
    #segments without activations are included
    
    #can be applied top either a raw set not yet normalized or after normalization
    #such as after z-score normalization)
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    if 'width' in params:
      bn_width = params['width']
    else:
      bn_width = 1
      
    if 'suffix' in params:
      suffix = params['suffix']
    else:
      suffix = '_bnwo'
      
    binscolumn = column + suffix

    if inplace is not True:

      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self.df_copy_train(mdf_train, column, binscolumn, suffixoverlap_results)

      mdf_test[binscolumn] = mdf_test[column].copy()

    else:

      suffixoverlap_results = \
      self.df_check_suffixoverlap(mdf_train, binscolumn, suffixoverlap_results)

      mdf_train.rename(columns = {column : binscolumn}, inplace = True)
      mdf_test.rename(columns = {column : binscolumn}, inplace = True)
    
    #convert all values to either numeric or NaN
    mdf_train[binscolumn] = pd.to_numeric(mdf_train[binscolumn], errors='coerce')
    mdf_test[binscolumn] = pd.to_numeric(mdf_test[binscolumn], errors='coerce')

    #get mean of training data
    mean = mdf_train[binscolumn].mean()
    
    if mean != mean:
      mean = 0

    #replace missing data with training set mean
    mdf_train[binscolumn] = mdf_train[binscolumn].fillna(mean)
    mdf_test[binscolumn] = mdf_test[binscolumn].fillna(mean)

    #evaluate train set for transformation parameters
    bn_min = mdf_train[binscolumn].min()
    bn_max = mdf_train[binscolumn].max()
    bn_delta = bn_max - bn_min
    if bn_delta == 0:
      bn_delta = 1
    bn_count = int(np.ceil(bn_delta / bn_width))
    
    bins_id = []
    for i in range(bn_count):
      bins_id.append(i)
      
    bins_cuts = [-float('inf'), float('inf')]
    for i in range(bn_count-1):
      bins_cuts.insert(-1,(bn_min + (i+1) * bn_width))
      
    #create bins based on standard deviation increments
#     binscolumn = column + '_bnwo'
    mdf_train[binscolumn] = \
    pd.cut(mdf_train[binscolumn], bins = bins_cuts,  \
           labels = bins_id, precision=len(str(bn_count)))
    mdf_test[binscolumn] = \
    pd.cut(mdf_test[binscolumn], bins = bins_cuts,  \
           labels = bins_id, precision=len(str(bn_count)))
    
    #change column dtype
    mdf_train[binscolumn] = mdf_train[binscolumn].astype(int)
    mdf_test[binscolumn] = mdf_test[binscolumn].astype(int)

    #create list of columns
    nmbrcolumns = [binscolumn]
    
    #new driftreport metric ordl_activations_dict
    ordl_activations_dict = {}
    for unique in mdf_train[binscolumn].unique():
      sumcalc = (mdf_train[binscolumn] == unique).sum() 
      ratio = sumcalc / mdf_train[binscolumn].shape[0]
      ordl_activations_dict.update({unique:ratio})

    activations_list = list(ordl_activations_dict)

    #nmbrnormalization_dict = {'mean' : mean, 'std' : std}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      nmbrnormalization_dict = {nc : {'suffix' : suffix, \
                                      'binsmean' : mean, \
                                      'bn_min' : bn_min, \
                                      'bn_max' : bn_max, \
                                      'bn_delta' : bn_delta, \
                                      'bn_count' : bn_count, \
                                      'bins_id' : bins_id, \
                                      'bins_cuts' : bins_cuts, \
                                      'bn_width' : bn_width, \
                                      'activations_list' : activations_list, \
                                      'ordl_activations_dict' : ordl_activations_dict}}

      column_dict = { nc : {'category' : 'bnwo', \
                            'origcategory' : category, \
                            'normalization_dict' : nmbrnormalization_dict, \
                            'origcolumn' : column, \
                            'inputcolumn' : column, \
                            'columnslist' : nmbrcolumns, \
                            'categorylist' : nmbrcolumns, \
                            'infillmodel' : False, \
                            'infillcomplete' : False, \
                            'suffixoverlap_results' : suffixoverlap_results, \
                            'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())

    return mdf_train, mdf_test, column_dict_list

  def process_bnep_class(self, mdf_train, mdf_test, column, category, postprocess_dict, params = {}):
    '''
    #processes a numerical set by creating equal population bins coresponding to 
    #parameter 'bincount' which defaults to 5
    #and returning in one-hot encoded set
    
    #default infill is to have no activations in a row
    
    #can be applied top either a raw set not yet normalized or after normalization
    #such as after z-score normalization)
    '''
    
    suffixoverlap_results = {}
    
    if 'bincount' in params:
      bincount = params['bincount']
    else:
      bincount = 5
      
    if 'suffix' in params:
      suffix = params['suffix']
    else:
      suffix = '_bnep'
      
    binscolumn = column + suffix

    #copy original column
    mdf_train, suffixoverlap_results = \
    self.df_copy_train(mdf_train, column, binscolumn, suffixoverlap_results)
    
    mdf_test[binscolumn] = mdf_test[column].copy()

    #convert all values to either numeric or NaN
    mdf_train[binscolumn] = pd.to_numeric(mdf_train[binscolumn], errors='coerce')
    mdf_test[binscolumn] = pd.to_numeric(mdf_test[binscolumn], errors='coerce')

    #get mean of training data
    mean = mdf_train[binscolumn].mean()
    
    if mean != mean:
      mean = 0

#     #replace missing data with training set mean
#     mdf_train[binscolumn] = mdf_train[binscolumn].fillna(mean)
#     mdf_test[binscolumn] = mdf_test[binscolumn].fillna(mean)

    #evaluate train set for transformation parameters
    bn_min = mdf_train[binscolumn].min()
    bn_max = mdf_train[binscolumn].max()
    bn_delta = bn_max - bn_min
#     if bn_delta == 0:
#       bn_delta = 1
#     bn_count = int(np.ceil(bn_delta / bn_width))

#     if bn_delta > 0 and bn_min == bn_min:

    if bn_delta > 0 and bn_min == bn_min:

      #grab the intervals using qcut based on equal population in train set
      intervalset = pd.qcut(mdf_train[binscolumn].values, bincount, duplicates='drop').unique()

      #note we're sorting here, and scrubbing any nan
      intervalset = sorted([interval for interval in intervalset if interval == interval])

      #we'll make the bottom interval open-ended at negative end
      firstinterval = pd.Interval(-np.inf, intervalset[0].right, closed='right')

      #and the last interval open-ended at positive end
      lastinterval = pd.Interval(intervalset[-1].left, np.inf, closed='right')

      #now create a list to apply in cut operatoin
      newinterval_list = []

      #now we'll assemble a list of intervals to prepare for the cut operation, 
      #replacing first and last with the open-ended
      for i in range(len(intervalset)):
        if i == 0:
          newinterval_list.append(firstinterval)
        elif i == len(intervalset)-1:
          newinterval_list.append(lastinterval)
        else:
          newinterval_list.append(intervalset[i])

      #now translate intervals to the list of boundaries for cut operatoin
      cutintervals = []

      for interval in newinterval_list:

        cutintervals.append(interval.left)

      cutintervals.append(np.inf)

      bn_count = len(newinterval_list)

      bins_id = []
      for i in range(bn_count):
        bins_id.append(str(i))

      bins_cuts = cutintervals

      #create bins based on standard deviation increments
  #     binscolumn = column + '_bnwd'
      mdf_train[binscolumn] = \
      pd.cut(mdf_train[binscolumn], bins = bins_cuts,  \
             labels = bins_id, precision=len(str(bn_count)), duplicates='drop')
      mdf_test[binscolumn] = \
      pd.cut(mdf_test[binscolumn], bins = bins_cuts,  \
             labels = bins_id, precision=len(str(bn_count)), duplicates='drop')

      foundinset = mdf_train[binscolumn].unique()

      textcolumns = []
      for i in foundinset:
        if i == i:
          textcolumns.append(binscolumn + '_' + str(i))

      #postprocess_textsupport_class will return columns in alphabetical order
      textcolumns.sort()
      
      suffixoverlap_results = \
      self.df_check_suffixoverlap(mdf_train, textcolumns, suffixoverlap_results)

      #process bins as a categorical set
      mdf_train = \
      self.postprocess_textsupport_class(mdf_train, binscolumn, {}, 'tempkey', {'textcolumns':textcolumns})
      mdf_test = \
      self.postprocess_textsupport_class(mdf_test, binscolumn, {}, 'tempkey', {'textcolumns':textcolumns})

      #change data type for memory savings
      for textcolumn in textcolumns:
        mdf_train[textcolumn] = mdf_train[textcolumn].astype(np.int8)
        mdf_test[textcolumn] = mdf_test[textcolumn].astype(np.int8)

      #delete the support column
      del mdf_train[binscolumn]
      del mdf_test[binscolumn]
      
    else:
      mdf_train[binscolumn] = 0
      mdf_test[binscolumn] = 0
      
      textcolumns = [binscolumn]
      
      bn_count = bincount
      bins_id = False
      bins_cuts = False

    #nmbrnormalization_dict = {'mean' : mean, 'std' : std}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in textcolumns:
      
      #new parameter collected for driftreport
      tc_ratio = nc + '_ratio'
      tcratio = mdf_train[nc].sum() / mdf_train[nc].shape[0]

      nmbrnormalization_dict = {nc : {'suffix' : suffix, \
                                      'binsmean' : mean, \
                                      'bn_min' : bn_min, \
                                      'bn_max' : bn_max, \
                                      'bn_delta' : bn_delta, \
                                      'bn_count' : bn_count, \
                                      'bins_id' : bins_id, \
                                      'bins_cuts' : bins_cuts, \
                                      'bincount_bnep' : bincount, \
                                      'textcolumns' : textcolumns, \
                                      tc_ratio : tcratio}}

      column_dict = { nc : {'category' : 'bnep', \
                            'origcategory' : category, \
                            'normalization_dict' : nmbrnormalization_dict, \
                            'origcolumn' : column, \
                            'inputcolumn' : column, \
                            'columnslist' : textcolumns, \
                            'categorylist' : textcolumns, \
                            'infillmodel' : False, \
                            'infillcomplete' : False, \
                            'suffixoverlap_results' : suffixoverlap_results, \
                            'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
       
    return mdf_train, mdf_test, column_dict_list

  def process_bneo_class(self, mdf_train, mdf_test, column, category, postprocess_dict, params = {}):
    '''
    #processes a numerical set by creating equal population bins coresponding to 
    #parameter 'bincount' which defaults to 5
    #and returning in ordinal encoded set
    
    #default infill is adjacent cell infill
    
    #can be applied top either a raw set not yet normalized or after normalization
    #such as after z-score normalization)
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    if 'bincount' in params:
      bincount = params['bincount']
    else:
      bincount = 5
      
    if 'suffix' in params:
      suffix = params['suffix']
    else:
      suffix = '_bneo'
      
    binscolumn = column + suffix

    if inplace is not True:

      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self.df_copy_train(mdf_train, column, binscolumn, suffixoverlap_results)

      mdf_test[binscolumn] = mdf_test[column].copy()

    else:

      suffixoverlap_results = \
      self.df_check_suffixoverlap(mdf_train, binscolumn, suffixoverlap_results)

      mdf_train.rename(columns = {column : binscolumn}, inplace = True)
      mdf_test.rename(columns = {column : binscolumn}, inplace = True)

    #convert all values to either numeric or NaN
    mdf_train[binscolumn] = pd.to_numeric(mdf_train[binscolumn], errors='coerce')
    mdf_test[binscolumn] = pd.to_numeric(mdf_test[binscolumn], errors='coerce')

    #get mean of training data
    mean = mdf_train[binscolumn].mean()
    
    if mean != mean:
      mean = 0

#     #replace missing data with training set mean
#     mdf_train[binscolumn] = mdf_train[binscolumn].fillna(mean)
#     mdf_test[binscolumn] = mdf_test[binscolumn].fillna(mean)

    #evaluate train set for transformation parameters
    bn_min = mdf_train[binscolumn].min()
    bn_max = mdf_train[binscolumn].max()
    bn_delta = bn_max - bn_min
#     if bn_delta == 0:
#       bn_delta = 1
#     bn_count = int(np.ceil(bn_delta / bn_width))

    if bn_delta > 0 and bn_min == bn_min:

      #grab the intervals using qcut based on equal population in train set
      intervalset = pd.qcut(mdf_train[binscolumn].values, bincount, duplicates='drop').unique()

      #note we're sorting here, and scrubbing any nan
      intervalset = sorted([interval for interval in intervalset if interval == interval])

      #we'll make the bottom interval open-ended at negative end
      firstinterval = pd.Interval(-np.inf, intervalset[0].right, closed='right')

      #and the last interval open-ended at positive end
      lastinterval = pd.Interval(intervalset[-1].left, np.inf, closed='right')

      #now create a list to apply in cut operation
      newinterval_list = []

      #now we'll assemble a list of intervals to prepare for the cut operation, 
      #replacing first and last with the open-ended
      for i in range(len(intervalset)):
        if i == 0:
          newinterval_list.append(firstinterval)
        elif i == len(intervalset)-1:
          newinterval_list.append(lastinterval)
        else:
          newinterval_list.append(intervalset[i])

      #now translate intervals to the list of boundaries for cut operatoin
      cutintervals = []

      for interval in newinterval_list:

        cutintervals.append(interval.left)

      cutintervals.append(np.inf)

      bn_count = len(newinterval_list)

      bins_id = []
      for i in range(bn_count):
        bins_id.append(i)

      bins_cuts = cutintervals

      #create bins based on prepared increments
      mdf_train[binscolumn] = \
      pd.cut(mdf_train[binscolumn], bins = bins_cuts,  \
             labels = bins_id, precision=len(str(bn_count)), duplicates='drop')
      mdf_test[binscolumn] = \
      pd.cut(mdf_test[binscolumn], bins = bins_cuts,  \
             labels = bins_id, precision=len(str(bn_count)), duplicates='drop')

      #apply ffill to replace NArows with value from adjacent cell in pre4ceding row
      mdf_train[binscolumn] = mdf_train[binscolumn].fillna(method='ffill')
      mdf_test[binscolumn] = mdf_test[binscolumn].fillna(method='ffill')

      #we'll follow with a bfill just in case first row had a nan
      mdf_train[binscolumn] = mdf_train[binscolumn].fillna(method='bfill')
      mdf_test[binscolumn] = mdf_test[binscolumn].fillna(method='bfill')

      #and if the entire set was nan we'll infill with a 0 plug
      mdf_train[binscolumn] = mdf_train[binscolumn].fillna(0)
      mdf_test[binscolumn] = mdf_test[binscolumn].fillna(0)

      #change column dtype
      mdf_train[binscolumn] = mdf_train[binscolumn].astype(int)
      mdf_test[binscolumn] = mdf_test[binscolumn].astype(int)
      
    else:
      
      mdf_train[binscolumn] = 0
      mdf_test[binscolumn] = 0
      
      bn_count = bincount
      bins_id = False
      bins_cuts = False

    #create list of columns
    nmbrcolumns = [binscolumn]

    #new driftreport metric ordl_activations_dict
    ordl_activations_dict = {}
    for unique in mdf_train[binscolumn].unique():
      sumcalc = (mdf_train[binscolumn] == unique).sum() 
      ratio = sumcalc / mdf_train[binscolumn].shape[0]
      ordl_activations_dict.update({unique:ratio})

    #nmbrnormalization_dict = {'mean' : mean, 'std' : std}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      nmbrnormalization_dict = {nc : {'suffix' : suffix, \
                                      'binsmean' : mean, \
                                      'bn_min' : bn_min, \
                                      'bn_max' : bn_max, \
                                      'bn_delta' : bn_delta, \
                                      'bn_count' : bn_count, \
                                      'bins_id' : bins_id, \
                                      'activations_list' : bins_id, \
                                      'bins_cuts' : bins_cuts, \
                                      'bincount' : bincount, \
                                      'ordl_activations_dict' : ordl_activations_dict}}

      column_dict = { nc : {'category' : 'bneo', \
                            'origcategory' : category, \
                            'normalization_dict' : nmbrnormalization_dict, \
                            'origcolumn' : column, \
                            'inputcolumn' : column, \
                            'columnslist' : nmbrcolumns, \
                            'categorylist' : nmbrcolumns, \
                            'infillmodel' : False, \
                            'infillcomplete' : False, \
                            'suffixoverlap_results' : suffixoverlap_results, \
                            'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())

    return mdf_train, mdf_test, column_dict_list
  
  def process_tlbn_class(self, mdf_train, mdf_test, column, category, postprocess_dict, params = {}):
    '''
    #processes a numerical set by creating equal population bins coresponding to 
    #parameter 'bincount' which defaults to 9
    #and returning in one-hot encoded set
    
    #how this differs from bnep in that the activated bins are replaced with
    #min-max scaling for source column values found in that bin, and then other values as -1
    
    #note that for the bottom bin order reversed to accomodate subsequent values out of range 
    #and still use -1 register
    
    #default infill is to have no activations in a row
    
    #can be applied top either a raw set not yet normalized or after normalization
    #such as after z-score normalization)
    '''
    
    suffixoverlap_results = {}
    
    if 'bincount' in params:
        
      bincount = params['bincount']
    
    else:
      
      bincount = 9

    binscolumn = column + '_tlbn'

    #copy original column
    mdf_train, suffixoverlap_results = \
    self.df_copy_train(mdf_train, column, binscolumn, suffixoverlap_results)
    
    mdf_test[binscolumn] = mdf_test[column].copy()

    #convert all values to either numeric or NaN
    mdf_train[binscolumn] = pd.to_numeric(mdf_train[binscolumn], errors='coerce')
    mdf_test[binscolumn] = pd.to_numeric(mdf_test[binscolumn], errors='coerce')

    #get mean of training data
    mean = mdf_train[binscolumn].mean()
    
    if mean != mean:
      mean = 0

#     #replace missing data with training set mean
#     mdf_train[binscolumn] = mdf_train[binscolumn].fillna(mean)
#     mdf_test[binscolumn] = mdf_test[binscolumn].fillna(mean)

    #evaluate train set for transformation parameters
    bn_min = mdf_train[binscolumn].min()
    bn_max = mdf_train[binscolumn].max()
    bn_delta = bn_max - bn_min
#     if bn_delta == 0:
#       bn_delta = 1
#     bn_count = int(np.ceil(bn_delta / bn_width))

    if bn_delta > 0 and bn_min == bn_min:

      #grab the intervals using qcut based on equal population in train set
      intervalset = pd.qcut(mdf_train[binscolumn].values, bincount, duplicates='drop').unique()

      #note we're sorting here, and scrubbing any nan
      intervalset = sorted([interval for interval in intervalset if interval == interval])

      #we'll make the bottom interval open-ended at negative end
      firstinterval = pd.Interval(-np.inf, intervalset[0].right, closed='right')

      #and the last interval open-ended at positive end
      lastinterval = pd.Interval(intervalset[-1].left, np.inf, closed='right')

      #now create a list to apply in cut operatoin
      newinterval_list = []

      #now we'll assemble a list of intervals to prepare for the cut operation, 
      #replacing first and last with the open-ended
      for i in range(len(intervalset)):
        if i == 0:
          newinterval_list.append(firstinterval)
        elif i == len(intervalset)-1:
          newinterval_list.append(lastinterval)
        else:
          newinterval_list.append(intervalset[i])

      #now translate intervals to the list of boundaries for cut operatoin
      cutintervals = []

      for interval in newinterval_list:

        cutintervals.append(interval.left)

      cutintervals.append(np.inf)

      bn_count = len(newinterval_list)

      bins_id = []
      for i in range(bn_count):
        bins_id.append(str(i))

      bins_cuts = cutintervals

      #create bins based on standard deviation increments
  #     binscolumn = column + '_bnwd'
      mdf_train[binscolumn] = \
      pd.cut(mdf_train[binscolumn], bins = bins_cuts,  \
             labels = bins_id, precision=len(str(bn_count)), duplicates='drop')
      mdf_test[binscolumn] = \
      pd.cut(mdf_test[binscolumn], bins = bins_cuts,  \
             labels = bins_id, precision=len(str(bn_count)), duplicates='drop')

      foundinset = mdf_train[binscolumn].unique()

      textcolumns = []
      for i in foundinset:
        if i == i:
          textcolumns.append(binscolumn + '_' + str(i))

      #postprocess_textsupport_class will return columns in alphabetical order
      textcolumns.sort()
      
      suffixoverlap_results = \
      self.df_check_suffixoverlap(mdf_train, textcolumns, suffixoverlap_results)

      #process bins as a categorical set
      mdf_train = \
      self.postprocess_textsupport_class(mdf_train, binscolumn, {}, 'tempkey', {'textcolumns':textcolumns})
      mdf_test = \
      self.postprocess_textsupport_class(mdf_test, binscolumn, {}, 'tempkey', {'textcolumns':textcolumns})
      
      #initialize binscolumn once more
      mdf_train[binscolumn] = mdf_train[column].copy()
      mdf_test[binscolumn] = mdf_test[column].copy()
      
      mdf_train[binscolumn] = pd.to_numeric(mdf_train[binscolumn], errors='coerce')
      mdf_test[binscolumn] = pd.to_numeric(mdf_test[binscolumn], errors='coerce')
      
      if len(textcolumns) > 1:

        #for i in range(bincount):
        for i in range(len(textcolumns)):

          tlbn_column = binscolumn + '_' + str(i)

          if i == 0:

            mdf_train[tlbn_column] = \
            np.where(mdf_train[tlbn_column] == 1, \
                    (bins_cuts[i+1] - mdf_train[binscolumn]) / (bins_cuts[i+1] - bn_min), -1)

            mdf_test[tlbn_column] = \
            np.where(mdf_test[tlbn_column] == 1, \
                    (bins_cuts[i+1] - mdf_test[binscolumn]) / (bins_cuts[i+1] - bn_min), -1)

          elif i == bincount - 1:

            mdf_train[tlbn_column] = \
            np.where(mdf_train[tlbn_column] == 1, \
                    (mdf_train[binscolumn] - bins_cuts[i]) / (bn_max - bins_cuts[i]), -1)

            mdf_test[tlbn_column] = \
            np.where(mdf_test[tlbn_column] == 1, \
                    (mdf_test[binscolumn] - bins_cuts[i]) / (bn_max - bins_cuts[i]), -1)

          else:

            mdf_train[tlbn_column] = \
            np.where(mdf_train[tlbn_column] == 1, \
                    (mdf_train[binscolumn] - bins_cuts[i]) / (bins_cuts[i+1] - bins_cuts[i]), -1)

            mdf_test[tlbn_column] = \
            np.where(mdf_test[tlbn_column] == 1, \
                    (mdf_test[binscolumn] - bins_cuts[i]) / (bins_cuts[i+1] - bins_cuts[i]), -1)

#       #change data type for memory savings
#       for textcolumn in textcolumns:
#         mdf_train[textcolumn] = mdf_train[textcolumn].astype(np.int8)
#         mdf_test[textcolumn] = mdf_test[textcolumn].astype(np.int8)

      #delete the support column
      del mdf_train[binscolumn]
      del mdf_test[binscolumn]
      
      
    else:
      mdf_train[binscolumn] = 0
      mdf_test[binscolumn] = 0
      
      textcolumns = [binscolumn]
      
      bn_count = bincount
      bins_id = False
      bins_cuts = False

    #nmbrnormalization_dict = {'mean' : mean, 'std' : std}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in textcolumns:

      #new parameter collected for driftreport
      tc_ratio = nc + '_ratio'
      tcratio = mdf_train[nc].sum() / mdf_train[nc].shape[0]

      nmbrnormalization_dict = {nc : {'binsmean' : mean, \
                                      'bn_min' : bn_min, \
                                      'bn_max' : bn_max, \
                                      'bn_delta' : bn_delta, \
                                      'bn_count' : bn_count, \
                                      'bins_id' : bins_id, \
                                      'bins_cuts' : bins_cuts, \
                                      'bincount_tlbn' : bincount, \
                                      'textcolumns' : textcolumns, \
                                      tc_ratio : tcratio}}

      column_dict = { nc : {'category' : 'tlbn', \
                            'origcategory' : category, \
                            'normalization_dict' : nmbrnormalization_dict, \
                            'origcolumn' : column, \
                            'inputcolumn' : column, \
                            'columnslist' : textcolumns, \
                            'categorylist' : textcolumns, \
                            'infillmodel' : False, \
                            'infillcomplete' : False, \
                            'suffixoverlap_results' : suffixoverlap_results, \
                            'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
       
    return mdf_train, mdf_test, column_dict_list
  
  def process_bkt1_class(self, mdf_train, mdf_test, column, category, postprocess_dict, params = {}):
    '''
    #processes a numerical set by creating custom bins coresponding to 
    #parameter 'buckets' which defaults to [0,1]
    #first and last buckets unconstrained
    #and returning in one-hot encoded set
    
    #removes buckets without activations in train set
    '''
    
    suffixoverlap_results = {}
    
    if 'buckets' in params:
        
      buckets = params['buckets']
    
    else:
      
      buckets = [0,1,2]
      
    binscolumn = column + '_bkt1'

    #store original column for later reversion
    mdf_train, suffixoverlap_results = \
    self.df_copy_train(mdf_train, column, binscolumn, suffixoverlap_results)
    
    mdf_test[binscolumn] = mdf_test[column].copy()

    #convert all values to either numeric or NaN
    mdf_train[binscolumn] = pd.to_numeric(mdf_train[binscolumn], errors='coerce')
    mdf_test[binscolumn] = pd.to_numeric(mdf_test[binscolumn], errors='coerce')

    #get mean of training data
    mean = mdf_train[binscolumn].mean()
    
    if mean != mean:
      mean = 0

    # #replace missing data with training set mean
    # mdf_train[binscolumn] = mdf_train[binscolumn].fillna(mean)
    # mdf_test[binscolumn] = mdf_test[binscolumn].fillna(mean)

    #assemble buckets  
    bins_cuts = buckets.copy()
    bins_cuts.insert(0, -np.inf)
    bins_cuts.insert(len(bins_cuts), np.inf)
    
    #create labels for bins
    bins_id = list(range(len(bins_cuts)-1))
    
    #create bins based on increments
#     binscolumn = column + '_bnwd'
    mdf_train[binscolumn] = \
    pd.cut(mdf_train[binscolumn], bins = bins_cuts,  \
           labels = bins_id, precision=len(str(len(bins_id))))
    mdf_test[binscolumn] = \
    pd.cut(mdf_test[binscolumn], bins = bins_cuts,  \
           labels = bins_id, precision=len(str(len(bins_id))))

    foundinset = mdf_train[binscolumn].unique()
    
    textcolumns = []
    for i in foundinset:
      textcolumns.append(binscolumn + '_' + str(i))
      
    #postprocess_textsupport_class will return columns in alphabetical order
    textcolumns.sort()
    
    #remove nan for cases where value did not fall within range
    textcolumns = [x for x in textcolumns if x[-3:] != 'nan']
    
    suffixoverlap_results = \
    self.df_check_suffixoverlap(mdf_train, textcolumns, suffixoverlap_results)
    
    #process bins as a categorical set
    mdf_train = \
    self.postprocess_textsupport_class(mdf_train, binscolumn, {}, 'tempkey', {'textcolumns':textcolumns})
    mdf_test = \
    self.postprocess_textsupport_class(mdf_test, binscolumn, {}, 'tempkey', {'textcolumns':textcolumns})
    
    #change data type for memory savings
    for textcolumn in textcolumns:
      mdf_train[textcolumn] = mdf_train[textcolumn].astype(np.int8)
      mdf_test[textcolumn] = mdf_test[textcolumn].astype(np.int8)
    
    #delete the support column
    del mdf_train[binscolumn]
    del mdf_test[binscolumn]

    #create list of columns
    nmbrcolumns = textcolumns

    #nmbrnormalization_dict = {'mean' : mean, 'std' : std}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in textcolumns:
      
      #new parameter collected for driftreport
      tc_ratio = nc + '_ratio'
      tcratio = mdf_train[nc].sum() / mdf_train[nc].shape[0]

      nmbrnormalization_dict = {nc : {'binsmean' : mean, \
                                      'buckets_bkt1' : buckets, \
                                      'bins_cuts' : bins_cuts, \
                                      'bins_id' : bins_id, \
                                      'textcolumns' : textcolumns, \
                                       tc_ratio : tcratio}}

      column_dict = { nc : {'category' : 'bkt1', \
                            'origcategory' : category, \
                            'normalization_dict' : nmbrnormalization_dict, \
                            'origcolumn' : column, \
                            'inputcolumn' : column, \
                            'columnslist' : textcolumns, \
                            'categorylist' : textcolumns, \
                            'infillmodel' : False, \
                            'infillcomplete' : False, \
                            'suffixoverlap_results' : suffixoverlap_results, \
                            'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
       
    return mdf_train, mdf_test, column_dict_list
  
  def process_bkt2_class(self, mdf_train, mdf_test, column, category, postprocess_dict, params = {}):
    '''
    #processes a numerical set by creating custom bins coresponding to 
    #parameter 'buckets' which defaults to [0,1]
    #first and last buckets bounded
    #and returning in one-hot encoded set
    
    #removes buckets without activations in train set
    '''
    
    suffixoverlap_results = {}
    
    if 'buckets' in params:
        
      buckets = params['buckets']
    
    else:
      
      buckets = [0,1,2]
      
    binscolumn = column + '_bkt2'

    #store original column for later reversion
    mdf_train, suffixoverlap_results = \
    self.df_copy_train(mdf_train, column, binscolumn, suffixoverlap_results)
    
    mdf_test[binscolumn] = mdf_test[column].copy()

    #convert all values to either numeric or NaN
    mdf_train[binscolumn] = pd.to_numeric(mdf_train[binscolumn], errors='coerce')
    mdf_test[binscolumn] = pd.to_numeric(mdf_test[binscolumn], errors='coerce')

    #get mean of training data
    mean = mdf_train[binscolumn].mean()
    
    if mean != mean:
      mean = 0

    # #replace missing data with training set mean
    # mdf_train[binscolumn] = mdf_train[binscolumn].fillna(mean)
    # mdf_test[binscolumn] = mdf_test[binscolumn].fillna(mean)

    #assemble buckets  
    bins_cuts = buckets.copy()
#     bins_cuts.insert(0, -np.inf)
#     bins_cuts.insert(len(bins_cuts), np.inf)

    #create labels for bins
    bins_id = list(range(len(bins_cuts)-1))
    
    #create bins based on increments
#     binscolumn = column + '_bnwd'
    mdf_train[binscolumn] = \
    pd.cut(mdf_train[binscolumn], bins = bins_cuts,  \
           labels = bins_id, precision=len(str(len(bins_id))))
    mdf_test[binscolumn] = \
    pd.cut(mdf_test[binscolumn], bins = bins_cuts,  \
           labels = bins_id, precision=len(str(len(bins_id))))

    foundinset = mdf_train[binscolumn].unique()
    
    textcolumns = []
    for i in foundinset:
      textcolumns.append(binscolumn + '_' + str(i))
      
    #postprocess_textsupport_class will return columns in alphabetical order
    textcolumns.sort()
    
    #remove nan for cases where value did not fall within range
    textcolumns = [x for x in textcolumns if x[-3:] != 'nan']
    
    suffixoverlap_results = \
    self.df_check_suffixoverlap(mdf_train, textcolumns, suffixoverlap_results)
    
    #process bins as a categorical set
    mdf_train = \
    self.postprocess_textsupport_class(mdf_train, binscolumn, {}, 'tempkey', {'textcolumns':textcolumns})
    mdf_test = \
    self.postprocess_textsupport_class(mdf_test, binscolumn, {}, 'tempkey', {'textcolumns':textcolumns})
    
    #change data type for memory savings
    for textcolumn in textcolumns:
      mdf_train[textcolumn] = mdf_train[textcolumn].astype(np.int8)
      mdf_test[textcolumn] = mdf_test[textcolumn].astype(np.int8)
    
    #delete the support column
    del mdf_train[binscolumn]
    del mdf_test[binscolumn]

    #nmbrnormalization_dict = {'mean' : mean, 'std' : std}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in textcolumns:
      
      #new parameter collected for driftreport
      tc_ratio = nc + '_ratio'
      tcratio = mdf_train[nc].sum() / mdf_train[nc].shape[0]

      nmbrnormalization_dict = {nc : {'binsmean' : mean, \
                                      'buckets_bkt2' : buckets, \
                                      'bins_cuts' : bins_cuts, \
                                      'bins_id' : bins_id, \
                                      'textcolumns' : textcolumns, \
                                       tc_ratio : tcratio}}

      column_dict = { nc : {'category' : 'bkt2', \
                            'origcategory' : category, \
                            'normalization_dict' : nmbrnormalization_dict, \
                            'origcolumn' : column, \
                            'inputcolumn' : column, \
                            'columnslist' : textcolumns, \
                            'categorylist' : textcolumns, \
                            'infillmodel' : False, \
                            'infillcomplete' : False, \
                            'suffixoverlap_results' : suffixoverlap_results, \
                            'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
       
    return mdf_train, mdf_test, column_dict_list
  
  def process_bkt3_class(self, mdf_train, mdf_test, column, category, postprocess_dict, params = {}):
    '''
    #processes a numerical set by creating custom bins coresponding to 
    #parameter 'buckets' which defaults to [0,1]
    #first and last buckets unconstrained
    #and returning in ordinal encoded set
    
    #segments without activations are included
    
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    if 'buckets' in params:
        
      buckets = params['buckets']
    
    else:
      
      buckets = [0,1,2]
      
    binscolumn = column + '_bkt3'

    if inplace is not True:

      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self.df_copy_train(mdf_train, column, binscolumn, suffixoverlap_results)

      mdf_test[binscolumn] = mdf_test[column].copy()

    else:

      suffixoverlap_results = \
      self.df_check_suffixoverlap(mdf_train, binscolumn, suffixoverlap_results)

      mdf_train.rename(columns = {column : binscolumn}, inplace = True)
      mdf_test.rename(columns = {column : binscolumn}, inplace = True)

    #convert all values to either numeric or NaN
    mdf_train[binscolumn] = pd.to_numeric(mdf_train[binscolumn], errors='coerce')
    mdf_test[binscolumn] = pd.to_numeric(mdf_test[binscolumn], errors='coerce')

    #get mean of training data
    mean = mdf_train[binscolumn].mean()
    
    if mean != mean:
      mean = 0

    # #replace missing data with training set mean
    # mdf_train[binscolumn] = mdf_train[binscolumn].fillna(mean)
    # mdf_test[binscolumn] = mdf_test[binscolumn].fillna(mean)

    #assemble buckets  
    bins_cuts = buckets.copy()
    bins_cuts.insert(0, -np.inf)
    bins_cuts.insert(len(bins_cuts), np.inf)
    
    #create labels for bins
    bins_id = list(range(len(bins_cuts)-1))

    infill_activation = len(bins_cuts)-1
      
    #create bins based on standard deviation increments
#     binscolumn = column + '_bnwo'
    mdf_train[binscolumn] = \
    pd.cut(mdf_train[binscolumn], bins = bins_cuts,  \
           labels = bins_id, precision=len(str(len(bins_id))))
    mdf_test[binscolumn] = \
    pd.cut(mdf_test[binscolumn], bins = bins_cuts,  \
           labels = bins_id, precision=len(str(len(bins_id))))

    mdf_train[binscolumn] = mdf_train[binscolumn].astype(float)
    mdf_test[binscolumn] = mdf_test[binscolumn].astype(float)
    
    #replace missing data with infill_activation
    mdf_train[binscolumn] = mdf_train[binscolumn].fillna(infill_activation)
    mdf_test[binscolumn] = mdf_test[binscolumn].fillna(infill_activation)
    
    #change column dtype
    mdf_train[binscolumn] = mdf_train[binscolumn].astype(int)
    mdf_test[binscolumn] = mdf_test[binscolumn].astype(int)

    #create list of columns
    nmbrcolumns = [binscolumn]
    
    #new driftreport metric ordl_activations_dict
    ordl_activations_dict = {}
    for unique in mdf_train[binscolumn].unique():
      sumcalc = (mdf_train[binscolumn] == unique).sum() 
      ratio = sumcalc / mdf_train[binscolumn].shape[0]
      ordl_activations_dict.update({unique:ratio})

    #nmbrnormalization_dict = {'mean' : mean, 'std' : std}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      nmbrnormalization_dict = {nc : {'binsmean' : mean, \
                                      'buckets' : buckets, \
                                      'bins_cuts' : bins_cuts, \
                                      'bins_id' : bins_id, \
                                      'activations_list' : bins_id, \
                                      'infill_activation' : infill_activation, \
                                      'ordl_activations_dict' : ordl_activations_dict}}

      column_dict = { nc : {'category' : 'bkt3', \
                            'origcategory' : category, \
                            'normalization_dict' : nmbrnormalization_dict, \
                            'origcolumn' : column, \
                            'inputcolumn' : column, \
                            'columnslist' : nmbrcolumns, \
                            'categorylist' : nmbrcolumns, \
                            'infillmodel' : False, \
                            'infillcomplete' : False, \
                            'suffixoverlap_results' : suffixoverlap_results, \
                            'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())

    return mdf_train, mdf_test, column_dict_list
  
  def process_bkt4_class(self, mdf_train, mdf_test, column, category, postprocess_dict, params = {}):
    '''
    #processes a numerical set by creating custom bins coresponding to 
    #parameter 'buckets' which defaults to [0,1]
    #first and last buckets bounded
    #and returning in ordinal encoded set
    
    #segments without activations are included
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    if 'buckets' in params:
        
      buckets = params['buckets']
    
    else:
      
      buckets = [0,1,2]
      
    binscolumn = column + '_bkt4'

    if inplace is not True:

      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self.df_copy_train(mdf_train, column, binscolumn, suffixoverlap_results)

      mdf_test[binscolumn] = mdf_test[column].copy()

    else:

      suffixoverlap_results = \
      self.df_check_suffixoverlap(mdf_train, binscolumn, suffixoverlap_results)

      mdf_train.rename(columns = {column : binscolumn}, inplace = True)
      mdf_test.rename(columns = {column : binscolumn}, inplace = True)

    #convert all values to either numeric or NaN
    mdf_train[binscolumn] = pd.to_numeric(mdf_train[binscolumn], errors='coerce')
    mdf_test[binscolumn] = pd.to_numeric(mdf_test[binscolumn], errors='coerce')
    
    #set all values that fall outside of bounded buckets to nan for replacement with mean
    mdf_train.loc[mdf_train[binscolumn] <= buckets[0], (binscolumn)] = np.nan
    mdf_test.loc[mdf_test[binscolumn] <= buckets[0], (binscolumn)] = np.nan
    
    mdf_train.loc[mdf_train[binscolumn] > buckets[-1], (binscolumn)] = np.nan
    mdf_test.loc[mdf_test[binscolumn] > buckets[-1], (binscolumn)] = np.nan

    #get mean of training data
    mean = mdf_train[binscolumn].mean()
    
    if mean != mean:
      mean = 0
      
    #edge case, if mean does nto fall within buckets range, we'll apply infill to top bucket
    #this edge case specific to bkt4
    #this assumes buckets was passed with sorted values
    if mean < buckets[0] or mean > buckets[-1]:
      mean = buckets[-1]

    # #replace missing data with training set mean
    # mdf_train[binscolumn] = mdf_train[binscolumn].fillna(mean)
    # mdf_test[binscolumn] = mdf_test[binscolumn].fillna(mean)

    #assemble buckets  
    bins_cuts = buckets.copy()
#     bins_cuts.insert(0, -np.inf)
#     bins_cuts.insert(len(bins_cuts), np.inf)

    #create labels for bins
    bins_id = list(range(len(bins_cuts)-1))

    infill_activation = len(bins_cuts)-1
      
    #create bins based on standard deviation increments
#     binscolumn = column + '_bnwo'
    mdf_train[binscolumn] = \
    pd.cut(mdf_train[binscolumn], bins = bins_cuts,  \
           labels = bins_id, precision=len(str(len(bins_id))))
    mdf_test[binscolumn] = \
    pd.cut(mdf_test[binscolumn], bins = bins_cuts,  \
           labels = bins_id, precision=len(str(len(bins_id))))
    
    mdf_train[binscolumn] = mdf_train[binscolumn].astype(float)
    mdf_test[binscolumn] = mdf_test[binscolumn].astype(float)
    
    #replace missing data with infill_activation
    mdf_train[binscolumn] = mdf_train[binscolumn].fillna(infill_activation)
    mdf_test[binscolumn] = mdf_test[binscolumn].fillna(infill_activation)
    
    #change column dtype
    mdf_train[binscolumn] = mdf_train[binscolumn].astype(int)
    mdf_test[binscolumn] = mdf_test[binscolumn].astype(int)

    #create list of columns
    nmbrcolumns = [binscolumn]
    
    #new driftreport metric ordl_activations_dict
    ordl_activations_dict = {}
    for unique in mdf_train[binscolumn].unique():
      sumcalc = (mdf_train[binscolumn] == unique).sum() 
      ratio = sumcalc / mdf_train[binscolumn].shape[0]
      ordl_activations_dict.update({unique:ratio})

    #nmbrnormalization_dict = {'mean' : mean, 'std' : std}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      nmbrnormalization_dict = {nc : {'binsmean' : mean, \
                                      'buckets' : buckets, \
                                      'bins_cuts' : bins_cuts, \
                                      'bins_id' : bins_id, \
                                      'activations_list' : bins_id, \
                                      'infill_activation' : infill_activation, \
                                      'ordl_activations_dict' : ordl_activations_dict}}

      column_dict = { nc : {'category' : 'bkt4', \
                            'origcategory' : category, \
                            'normalization_dict' : nmbrnormalization_dict, \
                            'origcolumn' : column, \
                            'inputcolumn' : column, \
                            'columnslist' : nmbrcolumns, \
                            'categorylist' : nmbrcolumns, \
                            'infillmodel' : False, \
                            'infillcomplete' : False, \
                            'suffixoverlap_results' : suffixoverlap_results, \
                            'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())

    return mdf_train, mdf_test, column_dict_list

  def process_DPnb_class(self, mdf_train, mdf_test, column, category, postprocess_dict, params = {}):
    '''
    #process_DPnb_class(mdf_train, mdf_test, column, category, postprocess_dict, params = {})
    #function to inject noise to training data, such as for differential privacy purposes
    #assumes input is numeric data with z-score normalization to mean 0 and sigma 1
    #adds data sampled from normal distribution with mean 0 and sigma 0.06 by default
    #where noise only injected to a subset of data based on flip_prob defaulting to 0.03
    #the noise properties may be customized with parameters 'mu', 'sigma', 'flip_prob'
    #note that the noise is only injected into the designated training data of df_train
    #for test data this is a pass-through operation
    #note this assumes clean data as input since this will be intended for downstream applicaiton
    #in family trees, so no infill is performed
    #note that for postprocess function in postmunge, determination of whether to treat
    #df_test as train or test data is based on the traindata entry in postprocess_dict
    #in automunge df_test is treated as test data by default
    '''
    
    suffixoverlap_results = {}
    
    #initialize parameters
    if 'mu' in params:
      mu = params['mu']
    else:
      mu = 0.0
      
    if 'sigma' in params:
      sigma = params['sigma']
    else:
      sigma = 0.06
      
    if 'flip_prob' in params:
      flip_prob = params['flip_prob']
    else:
      flip_prob = 1.0
      
    if 'noisedistribution' in params:
      noisedistribution = params['noisedistribution']
    else:
      #can pass as 'normal' or 'laplace'
      noisedistribution = 'normal'
      
    DPnm_column = column + '_DPnb'
    
    suffixoverlap_results = \
    self.df_check_suffixoverlap(mdf_train, DPnm_column, suffixoverlap_results)
      
    #first we'll derive our sampled noise for injection
    if noisedistribution == 'normal':
      normal_samples = np.random.normal(loc=mu, scale=sigma, size=(mdf_train.shape[0]))
    elif noisedistribution == 'laplace':
      normal_samples = np.random.laplace(loc=mu, scale=sigma, size=(mdf_train.shape[0]))
      
    binomial_samples = np.random.binomial(n=1, p=flip_prob, size=(mdf_train.shape[0]))
    
    mdf_train[DPnm_column] = pd.DataFrame(normal_samples) * pd.DataFrame(binomial_samples)
    
    #now inject noise
    mdf_train[DPnm_column] = mdf_train[DPnm_column] + mdf_train[column]
    
    #for test data is just pass-through
    mdf_test[DPnm_column] = mdf_test[column]
    
    #create list of columns
    nmbrcolumns = [DPnm_column]

    nmbrnormalization_dict = {DPnm_column : {'mu' : mu, 'sigma' : sigma, 'flip_prob' : flip_prob, \
                                             'noisedistribution' : noisedistribution}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'DPnb', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return mdf_train, mdf_test, column_dict_list

  def process_DPmm_class(self, mdf_train, mdf_test, column, category, postprocess_dict, params = {}):
    '''
    #process_DPmm_class(mdf_train, mdf_test, column, category, postprocess_dict, params = {})
    #function to inject noise to training data, such as for differential privacy purposes
    #assumes input is numeric data min-max scaled within range 0-1
    #adds data sampled from normal distribution with mean 0 and sigma 0.03 by default
    #the noise properties may be customized with parameters 'mu', 'sigma'
    #also accepts parameter 'flip_prob' for ratio of data that will be adjusted (defaults to 1.)
    #noise is scaled based on the recieved points to keep within range 0-1
    #(e.g. for recieved data point 0.1, noise is scaled so as not to fall below -0.1)
    #gaussian noise source is also capped to maintain the range -0.5 to 0.5 (rare outlier points)
    #note that the noise is only injected into the designated training data of df_train
    #for test data this is a pass-through operation
    #note this assumes clean data as input since this will be intended for downstream applicaiton
    #in family trees, so no infill is performed
    #note that for postprocess function in postmunge, determination of whether to treat
    #df_test as train or test data is based on the traindata entry in postprocess_dict
    #in automunge df_test is treated as test data by default
    '''
    
    suffixoverlap_results = {}
    
    #initialize parameters
    if 'mu' in params:
      mu = params['mu']
    else:
      mu = 0.0
      
    if 'sigma' in params:
      sigma = params['sigma']
    else:
      sigma = 0.03
      
    if 'flip_prob' in params:
      flip_prob = params['flip_prob']
    else:
      flip_prob = 1.
      
    if 'noisedistribution' in params:
      noisedistribution = params['noisedistribution']
    else:
      #can pass as 'normal' or 'laplace'
      noisedistribution = 'normal'
      
    DPmm_column = column + '_DPmm'
    DPmm_column_temp1 = column + '_DPmm' + '_tmp1'
    
    suffixoverlap_results = \
    self.df_check_suffixoverlap(mdf_train, [DPmm_column, DPmm_column_temp1], suffixoverlap_results)
      
    #first we'll derive our sampled noise for injection
    if noisedistribution == 'normal':
      normal_samples = np.random.normal(loc=mu, scale=sigma, size=(mdf_train.shape[0]))
    elif noisedistribution == 'laplace':
      normal_samples = np.random.laplace(loc=mu, scale=sigma, size=(mdf_train.shape[0]))
    binomial_samples = np.random.binomial(n=1, p=flip_prob, size=(mdf_train.shape[0]))
    
    mdf_train[DPmm_column] = pd.DataFrame(normal_samples) * pd.DataFrame(binomial_samples)
    
    #cap outliers
    mdf_train[DPmm_column] = np.where(mdf_train[DPmm_column] < -0.5, np.nan, mdf_train[DPmm_column])
    mdf_train[DPmm_column] = np.where(mdf_train[DPmm_column] > 0.5, np.nan, mdf_train[DPmm_column])
    
    #adjacent cell infill
    mdf_train[DPmm_column] = mdf_train[DPmm_column].fillna(method='ffill')
    mdf_train[DPmm_column] = mdf_train[DPmm_column].fillna(method='bfill')
    
    #support column to signal sign of noise, 0 is neg, 1 is pos
    mdf_train[DPmm_column_temp1] = 0
    mdf_train[DPmm_column_temp1] = np.where(mdf_train[DPmm_column] >= 0., 1, mdf_train[DPmm_column_temp1])
    
    #now inject noise, with scaled noise to maintain range 0-1
    #(so if mnmx value <0.5, and neg noise, we scale noise to maintain ratio as if minmax was 0.5, similarly for >0.5 mnmx)
    mdf_train[DPmm_column] = np.where(mdf_train[column] < 0.5, \
                                      mdf_train[column] + \
                                      (1 - mdf_train[DPmm_column_temp1]) * (mdf_train[DPmm_column] * mdf_train[column] / 0.5) + \
                                      (mdf_train[DPmm_column_temp1]) * (mdf_train[DPmm_column]), \
                                      mdf_train[DPmm_column])
    
    mdf_train[DPmm_column] = np.where(mdf_train[column] >= 0.5, \
                                      mdf_train[column] + \
                                      (1 - mdf_train[DPmm_column_temp1]) * (mdf_train[DPmm_column]) + \
                                      (mdf_train[DPmm_column_temp1]) * (mdf_train[DPmm_column] * (1 - mdf_train[column]) / 0.5), \
                                      mdf_train[DPmm_column])
    
    #remove support column
    del mdf_train[DPmm_column_temp1]
    
    #for test data is just pass-through
    mdf_test[DPmm_column] = mdf_test[column]
    
    #create list of columns
    nmbrcolumns = [DPmm_column]

    nmbrnormalization_dict = {DPmm_column : {'mu' : mu, 'sigma' : sigma, 'flip_prob' : flip_prob, \
                                             'noisedistribution' : noisedistribution}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'DPmm', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return mdf_train, mdf_test, column_dict_list

  def process_DPrt_class(self, mdf_train, mdf_test, column, category, postprocess_dict, params = {}):
    """
    #process_DPrt_class
    #function to scale data as follows:
    
    # if max >= 0 and min <= 0:
    #   #scaling based on 
    #   x = x / (max - min)

    # elif max >= 0 and min >= 0:
    #   #traditional min/max
    #   x = (x - min) / (max - min)
    
    # elif max <= 0 and min <= 0:
    #   #max/min (retains negative values)
    #   x = (x - max) / (max - min)
    
    #followed by a noise injection similar to DPmm based based on this set's retn range
    
    #replaces missing or improperly formatted data with mean of remaining values
    #(prior to noise injection)
    
    #returns same dataframes with new column of name column + '_DPrt'
    #note this is a "dualprocess" function since is applied to both dataframes
    
    #note with parameters divisor can also be set as standard deviation
    #also aprameters accepted for cap/floor/mulitplier/offset
    #where cap/floor based on pretransform values
    #multiplier/offset based on posttransform values, muoltiplier applied betfore offset
    """
    
    suffixoverlap_results = {}
    
    #accepts divisor parameters of 'minmax' or 'std', eg divisor for normalization equation
    #note that standard deviation doesn't have same properties for sign retention when all values > or < 0
    if 'divisor' in params:
      divisor = params['divisor']
    else:
      divisor = 'minmax'
    
    #offset is just an added constant applied after multiplier
    if 'offset' in params:
      offset = params['offset']
    else:
      offset = 0
      
    #multiplier scales the set by multiplication prior to offset
    if 'multiplier' in params:
      multiplier = params['multiplier']
    else:
      multiplier = 1
    
    #cap can be passed as True for max of training data or as a specific value prior to normalization, False for no cap
    if 'cap' in params:
      cap = params['cap']
    else:
      cap = False
      
    #floor can be passed as True for min of training data or as a specific value prior to normalization, False for no floor
    if 'floor' in params:
      floor = params['floor']
    else:
      floor = False
      
    #adjinfill accepts True/False to change default infill from mean inputation to adjacent cell
    if 'adjinfill' in params:
      adjinfill = params['adjinfill']
    else:
      adjinfill = False
      
    #here are differential privacy parameters
    if 'mu' in params:
      mu = params['mu']
    else:
      mu = 0.0
      
    if 'sigma' in params:
      sigma = params['sigma']
    else:
      sigma = 0.03
      
    if 'flip_prob' in params:
      flip_prob = params['flip_prob']
    else:
      flip_prob = 1.
      
    if 'noisedistribution' in params:
      noisedistribution = params['noisedistribution']
    else:
      #can pass as 'normal' or 'laplace'
      noisedistribution = 'normal'
      
    DPrt_column = column + '_DPrt'
    DPrt_column_temp1 = column + '_DPrt' + '_tmp1'
    DPrt_column_temp2 = column + '_DPrt' + '_tmp2'
    
    newcolumns = [DPrt_column, DPrt_column_temp1, DPrt_column_temp2]
    
    suffixoverlap_results = \
    self.df_check_suffixoverlap(mdf_train, newcolumns, suffixoverlap_results)
    
    #copy source column into new column
    mdf_train[DPrt_column] = mdf_train[column].copy()
    mdf_test[DPrt_column] = mdf_test[column].copy()

    #convert all values to either numeric or NaN
    mdf_train[DPrt_column] = pd.to_numeric(mdf_train[DPrt_column], errors='coerce')
    mdf_test[DPrt_column] = pd.to_numeric(mdf_test[DPrt_column], errors='coerce')
    
    #a few more metrics collected for driftreport
    #get standard deviation of training data
    std = mdf_train[DPrt_column].std()
    
    mad = mdf_train[DPrt_column].mad()
    
    #get maximum value of training column
    maximum = mdf_train[DPrt_column].max()
    
    #get minimum value of training column
    minimum = mdf_train[DPrt_column].min()
    
    #avoid outlier div by zero when max = min
    maxminusmin = maximum - minimum
    if maxminusmin == 0 or maxminusmin != maxminusmin:
      maxminusmin = 1
      
    if std != std or std == 0:
      std = 1
      
    if mad != mad or mad == 0:
      mad = 1
      
    #if cap < maximum, maximum = cap
    if cap is not False and cap is not True:
      if cap < maximum:
        maximum = cap
    if floor is not False and floor is not True:
      if floor > minimum:
        minimum = floor
        
    #cap and floor application
    if cap is True:
      cap = maximum
    if floor is True:
      floor = minimum
      
    if cap is not False:
      #replace values in test > cap with cap
      mdf_train.loc[mdf_train[DPrt_column] > cap, (DPrt_column)] \
      = cap
      
      mdf_test.loc[mdf_test[DPrt_column] > cap, (DPrt_column)] \
      = cap
    
    if floor is not False:
      #replace values in test < floor with floor
      mdf_train.loc[mdf_train[DPrt_column] < floor, (DPrt_column)] \
      = floor
      
      mdf_test.loc[mdf_test[DPrt_column] < floor, (DPrt_column)] \
      = floor
      
    #get mean of training data
    mean = mdf_train[DPrt_column].mean()
    if mean != mean:
      mean = 0
    
    if adjinfill is True:
      mdf_train[DPrt_column] = mdf_train[DPrt_column].fillna(method='ffill')
      mdf_test[DPrt_column] = mdf_test[DPrt_column].fillna(method='ffill')
      mdf_train[DPrt_column] = mdf_train[DPrt_column].fillna(method='bfill')
      mdf_test[DPrt_column] = mdf_test[DPrt_column].fillna(method='bfill')
    
    #replace missing data with training set mean
    mdf_train[DPrt_column] = mdf_train[DPrt_column].fillna(mean)
    mdf_test[DPrt_column] = mdf_test[DPrt_column].fillna(mean)
    
    #edge case (only neccesary so scalingapproach is assigned)
    if maximum != maximum:
      maximum = 0
    if minimum != minimum:
      minimum = 0
      
    #divisor
    if divisor not in ['minmax', 'std', 'mad']:
      print("Error: retn transform parameter 'divisor' only accepts entries of 'minmax' 'mad' or 'std'")
    if divisor == 'minmax':
      divisor = maxminusmin
    elif divisor == 'mad':
      divisor = mad
    else:
      divisor = std
      
    if divisor == 0 or divisor != divisor:
      divisor = 1
    
    #driftreport metric scalingapproach returned as 'retn' or 'mnmx' or 'mxmn'
    #where mnmx is for cases where all values in train set are positive
    #mxmn is for cases where all values in train set are negative
    
    if maximum >= 0 and minimum <= 0:
      
      mdf_train[DPrt_column] = (mdf_train[DPrt_column]) / \
                                    (divisor) * multiplier + offset
      
      mdf_test[DPrt_column] = (mdf_test[DPrt_column]) / \
                                    (divisor) * multiplier + offset
      
      scalingapproach = 'retn'
      
    elif maximum >= 0 and minimum >= 0:
    
      #perform min-max scaling to train and test sets using values from train
      mdf_train[DPrt_column] = (mdf_train[DPrt_column] - minimum) / \
                                    (divisor) * multiplier + offset

      mdf_test[DPrt_column] = (mdf_test[DPrt_column] - minimum) / \
                                   (divisor) * multiplier + offset
      
      scalingapproach = 'mnmx'
      
    elif maximum <= 0 and minimum <= 0:
    
      #perform min-max scaling to train and test sets using values from train
      mdf_train[DPrt_column] = (mdf_train[DPrt_column] - maximum) / \
                                    (divisor) * multiplier + offset

      mdf_test[DPrt_column] = (mdf_test[DPrt_column] - maximum) / \
                                   (divisor) * multiplier + offset
      
      scalingapproach = 'mxmn'
      
      
    #now apply noise injection
    
    #first we'll derive our sampled noise for injection
    if noisedistribution == 'normal':
      normal_samples = np.random.normal(loc=mu, scale=sigma, size=(mdf_train.shape[0]))
    elif noisedistribution == 'laplace':
      normal_samples = np.random.laplace(loc=mu, scale=sigma, size=(mdf_train.shape[0]))
      
    binomial_samples = np.random.binomial(n=1, p=flip_prob, size=(mdf_train.shape[0]))
    
    mdf_train[DPrt_column_temp2] = pd.DataFrame(normal_samples) * pd.DataFrame(binomial_samples)
    
    #cap outliers
    mdf_train[DPrt_column_temp2] = np.where(mdf_train[DPrt_column_temp2] < -0.5, np.nan, mdf_train[DPrt_column_temp2])
    mdf_train[DPrt_column_temp2] = np.where(mdf_train[DPrt_column_temp2] > 0.5, np.nan, mdf_train[DPrt_column_temp2])
    
    #adjacent cell infill
    mdf_train[DPrt_column_temp2] = mdf_train[DPrt_column_temp2].fillna(method='ffill')
    mdf_train[DPrt_column_temp2] = mdf_train[DPrt_column_temp2].fillna(method='bfill')
    
    #support column to signal sign of noise, 0 is neg, 1 is pos
    mdf_train[DPrt_column_temp1] = 0
    mdf_train[DPrt_column_temp1] = np.where(mdf_train[DPrt_column_temp2] >= 0., 1, mdf_train[DPrt_column_temp1])
    
    #for noise injection we'll first move data into range 0-1 and then revert after injection
    if scalingapproach == 'retn':
      mdf_train[DPrt_column] = (mdf_train[DPrt_column] - (minimum / divisor) ) / multiplier - offset
    elif scalingapproach == 'mnmx':
      mdf_train[DPrt_column] = (mdf_train[DPrt_column]) / multiplier - offset
    elif scalingapproach == 'mxmn':
      mdf_train[DPrt_column] = (mdf_train[DPrt_column] + (maximum - minimum) / divisor) / multiplier - offset
      
    
    #now inject noise, with scaled noise to maintain range 0-1
    #(so if mnmx value <0.5, and neg noise, we scale noise to maintain ratio as if minmax was 0.5, similarly for >0.5 mnmx)
    mdf_train[DPrt_column] = np.where(mdf_train[DPrt_column] < 0.5, \
                                      mdf_train[DPrt_column] + \
                                      (1 - mdf_train[DPrt_column_temp1]) * (mdf_train[DPrt_column_temp2] * mdf_train[DPrt_column] / 0.5) + \
                                      (mdf_train[DPrt_column_temp1]) * (mdf_train[DPrt_column_temp2]), \
                                      mdf_train[DPrt_column])
    
    mdf_train[DPrt_column] = np.where(mdf_train[DPrt_column] >= 0.5, \
                                      mdf_train[DPrt_column] + \
                                      (1 - mdf_train[DPrt_column_temp1]) * (mdf_train[DPrt_column_temp2]) + \
                                      (mdf_train[DPrt_column_temp1]) * (mdf_train[DPrt_column_temp2] * (1 - mdf_train[DPrt_column]) / 0.5), \
                                      mdf_train[DPrt_column])
    
    #remove support columns
    del mdf_train[DPrt_column_temp1]
    del mdf_train[DPrt_column_temp2]
    
    #for noise injection we'll first move data into range 0-1 and then revert after injection
    if scalingapproach == 'retn':
      mdf_train[DPrt_column] = (mdf_train[DPrt_column] + (minimum / divisor) ) * multiplier + offset
    elif scalingapproach == 'mnmx':
      mdf_train[DPrt_column] = (mdf_train[DPrt_column]) * multiplier + offset
    elif scalingapproach == 'mxmn':
      mdf_train[DPrt_column] = (mdf_train[DPrt_column] - (maximum - minimum) / divisor) * multiplier + offset
    
    #for test data is just pass-through
    #mdf_test[DPrt_column] = mdf_test[DPrt_column]
    
    #create list of columns
    nmbrcolumns = [DPrt_column]
    
    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []
    
    nmbrnormalization_dict = {DPrt_column : {'mu' : mu, \
                                             'sigma' : sigma, \
                                             'flip_prob' : flip_prob, \
                                             'noisedistribution' : noisedistribution, \
                                             'minimum' : minimum, \
                                             'maximum' : maximum, \
                                             'mean' : mean, \
                                             'std' : std, \
                                             'mad' : mad, \
                                             'scalingapproach' : scalingapproach, \
                                             'offset' : offset, \
                                             'multiplier': multiplier, \
                                             'cap' : cap, \
                                             'floor' : floor, \
                                             'divisor' : divisor, \
                                             'adjinfill' : adjinfill, \
                                            }}
    
    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'DPrt', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return mdf_train, mdf_test, column_dict_list

  def process_DPbn_class(self, mdf_train, mdf_test, column, category, postprocess_dict, params = {}):
    '''
    #process_DPbn_class(mdf_train, mdf_test, column, category, postprocess_dict, params = {})
    #function to inject noise to training data, such as for differential privacy purposes
    #assumes input is bnry encoded data (i.e. boolean integers in single column)
    #adds data sampled from Bernoulli distribution with flip_prob 0.03 by default
    #the noise properties may be customized with parameter 'flip_prob'
    #note that the noise is only injected into the designated training data of df_train
    #for test data this is a pass-through operation
    #note this assumes clean data as input since this will be intended for downstream applicaiton
    #in family trees, so no infill is performed
    #note that for postprocess function in postmunge, determination of whether to treat
    #df_test as train or test data is based on the traindata entry in postprocess_dict
    #in automunge df_test is treated as test data by default
    '''
    
    suffixoverlap_results = {}
    
    #initialize parameters
    if 'flip_prob' in params:
      flip_prob = params['flip_prob']
    else:
      flip_prob = 0.03
      
    DPbn_column = column + '_DPbn'
    
    suffixoverlap_results = \
    self.df_check_suffixoverlap(mdf_train, DPbn_column, suffixoverlap_results)
      
    #first we'll derive our sampled noise for injection
    mdf_train[DPbn_column] = pd.DataFrame(np.random.binomial(n=1, p=flip_prob, size=(mdf_train.shape[0])))
    
    #now inject noise
    mdf_train[DPbn_column] = abs(mdf_train[column] - mdf_train[DPbn_column])
    
    #for test data is just pass-through
    mdf_test[DPbn_column] = mdf_test[column]
    
    #create list of columns
    nmbrcolumns = [DPbn_column]

    nmbrnormalization_dict = {DPbn_column : {'flip_prob' : flip_prob}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'DPbn', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return mdf_train, mdf_test, column_dict_list

  def process_DPod_class(self, mdf_train, mdf_test, column, category, postprocess_dict, params = {}):
    '''
    #process_DPod_class(mdf_train, mdf_test, column, category, postprocess_dict, params = {})
    #function to inject noise to training data, such as for differential privacy purposes
    #assumes input is ordinal encoded data (i.e. categoric by integer in single column)
    #adds data sampled from Bernoulli distribution with flip_prob 0.03 by default
    #the noise properties may be customized with parameter 'flip_prob'
    #when flip activated selects from the set of encodings per level random draw
    #(including potenitally the current encoding for no flip)
    #note that the noise is only injected into the designated training data of df_train
    #for test data this is a pass-through operation
    #note this assumes clean data as input since this will be intended for downstream applicaiton
    #in family trees, so no infill is performed
    #note that for postprocess function in postmunge, determination of whether to treat
    #df_test as train or test data is based on the traindata entry in postprocess_dict
    #in automunge df_test is treated as test data by default
    '''
    
    suffixoverlap_results = {}
    
    #initialize parameters
    if 'flip_prob' in params:
      flip_prob = params['flip_prob']
    else:
      flip_prob = 0.03
      
    DPod_column = column + '_DPod'
    DPod_tempcolumn1 = column + '_DPod_tmp1'
    DPod_tempcolumn2 = column + '_DPod_tmp2'
    
    newcolumns = [DPod_column, DPod_tempcolumn1, DPod_tempcolumn2]
    
    suffixoverlap_results = \
    self.df_check_suffixoverlap(mdf_train, newcolumns, suffixoverlap_results)
    
    #we'll want to know the set of activations present in column, for automunge this is unique values
    ord_encodings = mdf_train[column].unique()
      
    #first we'll derive our sampled noise for injection
    mdf_train[DPod_tempcolumn1] = pd.DataFrame(np.random.binomial(n=1, p=flip_prob, size=(mdf_train.shape[0])))
    mdf_train[DPod_tempcolumn2] = pd.DataFrame(np.random.choice(ord_encodings, size=(mdf_train.shape[0])))
    
    #now inject noise
    
    #initialize return column
    mdf_train[DPod_column] = 0
    
    for entry in list(ord_encodings):
      
      #this returns column value when DPod_tempcolumn1 is 0 or DPod_tempcolumn2 when DPod_tempcolumn1 is 1
      mdf_train[DPod_column] = \
      np.where(mdf_train[column] == entry, \
               mdf_train[column] * (1 - mdf_train[DPod_tempcolumn1]) + mdf_train[DPod_tempcolumn1] * mdf_train[DPod_tempcolumn2], \
               mdf_train[DPod_column])
      
    del mdf_train[DPod_tempcolumn1]
    del mdf_train[DPod_tempcolumn2]
    
    #for test data is just pass-through
    mdf_test[DPod_column] = mdf_test[column]
    
    #create list of columns
    nmbrcolumns = [DPod_column]

    nmbrnormalization_dict = {DPod_column : {'flip_prob' : flip_prob}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'DPod', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return mdf_train, mdf_test, column_dict_list
  
  def process_null_class(self, df, column, category, postprocess_dict, params = {}):
    '''
    #here we'll delete any columns that returned a 'null' category
    #note this is a. singleprocess transform
    '''
    
    suffixoverlap_results = {}
    
    #df = df.drop([column], axis=1)
    #deletion takes place elsewhere

    column_dict_list = []

    column_dict = {column + '_null' : {'category' : 'null', \
                                      'origcategory' : category, \
                                      'normalization_dict' : {column + '_null':{}}, \
                                      'origcolumn' : column, \
                                      'inputcolumn' : column, \
                                      'columnslist' : [], \
                                      'categorylist' : [], \
                                      'infillmodel' : False, \
                                      'infillcomplete' : False, \
                                      'suffixoverlap_results' : suffixoverlap_results, \
                                      'deletecolumn' : False}}
    
    #now append column_dict onto postprocess_dict
    column_dict_list.append(column_dict.copy())

    return df, column_dict_list
  
  def process_copy_class(self, df, column, category, postprocess_dict, params = {}):
    '''
    #copy function
    #accepts parameter 'suffix' for suffix appender
    #useful if want to apply same function more than once with different parameters
    '''
    
    suffixoverlap_results = {}
    
    if 'suffix' in params:
        
      copy_column = column + params['suffix']
    
    else:
      
      copy_column = column + '_copy'
    
    df, suffixoverlap_results = \
    self.df_copy_train(df, column, copy_column, suffixoverlap_results)

    column_dict_list = []

    column_dict = {copy_column : {'category' : 'copy', \
                                 'origcategory' : category, \
                                 'normalization_dict' : {copy_column:{}}, \
                                 'origcolumn' : column, \
                                 'inputcolumn' : column, \
                                 'columnslist' : [copy_column], \
                                 'categorylist' : [copy_column], \
                                 'infillmodel' : False, \
                                 'infillcomplete' : False, \
                                 'suffixoverlap_results' : suffixoverlap_results, \
                                 'deletecolumn' : False}}
    
    #now append column_dict onto postprocess_dict
    column_dict_list.append(column_dict.copy())

    return df, column_dict_list

  def process_excl_class(self, df, column, category, postprocess_dict, params = {}):
    """
    #here we'll address any columns that returned a 'excl' category
    #note this is a. singleprocess transform
    #we'll simply maintain the same column but with a suffix to the header
    #the excl trasnform is a very special exception, and this suffix is later
    #removed when automunge(*.)parameter excl_suffix passed as False
    
    #note that excl transform is also special in that it may only be applied
    #as a supplement primitive in a family tree (eg cousins)
    #as it replaces the source column internally (by a simple rename)
    
    #Note that the function check_transformdict(.) works 'under the hood'
    #to translate user passed excl transforms in family trees
    #from replacement primitives to corresponding supplement primitives
    """
    
    suffixoverlap_results = {}

    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    exclcolumn = column + '_excl'

    if inplace is not True:

      df, suffixoverlap_results = \
      self.df_copy_train(df, column, exclcolumn, suffixoverlap_results)

    else:
    
      suffixoverlap_results = \
      self.df_check_suffixoverlap(df, exclcolumn, suffixoverlap_results)
      
      df.rename(columns = {column : exclcolumn}, inplace = True)

    #decided against this to maximize efficiency
    #there are some workflow scenarios where a lot of excl columns in postmunge
    # #this moves exclcolumn to end of dataframe to maintain column order correspondance
    # df_columns = list(df)
    # df_columns.remove(exclcolumn)
    # df_columns.append(exclcolumn)
    # df = df.reindex(columns = df_columns)
    
    column_dict_list = []

    column_dict = {exclcolumn : {'category' : 'excl', \
                                 'origcategory' : category, \
                                 'normalization_dict' : {exclcolumn:{}}, \
                                 'origcolumn' : column, \
                                 'inputcolumn' : column, \
                                 'columnslist' : [exclcolumn], \
                                 'categorylist' : [exclcolumn], \
                                 'infillmodel' : False, \
                                 'infillcomplete' : False, \
                                 'suffixoverlap_results' : suffixoverlap_results, \
                                 'deletecolumn' : False}}
    
    #now append column_dict onto postprocess_dict
    column_dict_list.append(column_dict.copy())

    return df, column_dict_list

  def process_exc2_class(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    #here we'll address any columns that returned a 'excl' category
    #note this is a. singleprocess transform
    #we'll simply maintain the same column but with a suffix to the header
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    exclcolumn = column + '_exc2'
    
    if inplace is not True:

      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self.df_copy_train(mdf_train, column, exclcolumn, suffixoverlap_results)

      mdf_test[exclcolumn] = mdf_test[column].copy()

    else:

      suffixoverlap_results = \
      self.df_check_suffixoverlap(mdf_train, exclcolumn, suffixoverlap_results)

      mdf_train.rename(columns = {column : exclcolumn}, inplace = True)
      mdf_test.rename(columns = {column : exclcolumn}, inplace = True)
    
    #del df[column]
    
    mdf_train[exclcolumn] = pd.to_numeric(mdf_train[exclcolumn], errors='coerce')
    mdf_test[exclcolumn] = pd.to_numeric(mdf_test[exclcolumn], errors='coerce')
    
    if len(mdf_train[exclcolumn].mode())<1:
      fillvalue = mdf_train[exclcolumn].mean()
    else:
      fillvalue = mdf_train[exclcolumn].mode()[0]
      
    #special case if column didn't have any numeric entries
    if fillvalue != fillvalue:
      fillvalue = 0
    
    #replace missing data with fill value
    mdf_train[exclcolumn] = mdf_train[exclcolumn].fillna(fillvalue)
    mdf_test[exclcolumn] = mdf_test[exclcolumn].fillna(fillvalue)
    
    exc2_normalization_dict = {exclcolumn : {'fillvalue' : fillvalue}}
    
    column_dict_list = []

    column_dict = {exclcolumn : {'category' : 'exc2', \
                                 'origcategory' : category, \
                                 'normalization_dict' : exc2_normalization_dict, \
                                 'origcolumn' : column, \
                                 'inputcolumn' : column, \
                                 'columnslist' : [exclcolumn], \
                                 'categorylist' : [exclcolumn], \
                                 'infillmodel' : False, \
                                 'infillcomplete' : False, \
                                 'suffixoverlap_results' : suffixoverlap_results, \
                                 'deletecolumn' : False}}
    
    #now append column_dict onto postprocess_dict
    column_dict_list.append(column_dict.copy())

    return mdf_train, mdf_test, column_dict_list
  
  def process_exc5_class(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    #here we'll address any columns that returned a 'excl' category
    #note this is a. singleprocess transform
    #we'll simply maintain the same column but with a suffix to the header
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    exclcolumn = column + '_exc5'
    
    if inplace is not True:

      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self.df_copy_train(mdf_train, column, exclcolumn, suffixoverlap_results)

      mdf_test[exclcolumn] = mdf_test[column].copy()

    else:

      suffixoverlap_results = \
      self.df_check_suffixoverlap(mdf_train, exclcolumn, suffixoverlap_results)

      mdf_train.rename(columns = {column : exclcolumn}, inplace = True)
      mdf_test.rename(columns = {column : exclcolumn}, inplace = True)
    
    #del df[column]
    
    mdf_train[exclcolumn] = pd.to_numeric(mdf_train[exclcolumn], errors='coerce')
    mdf_test[exclcolumn] = pd.to_numeric(mdf_test[exclcolumn], errors='coerce')
    
    #non integers are subject to infill
    mdf_train[exclcolumn] = np.where(mdf_train[exclcolumn] == mdf_train[exclcolumn].round(), mdf_train[exclcolumn], np.nan)
    mdf_test[exclcolumn] = np.where(mdf_test[exclcolumn] == mdf_test[exclcolumn].round(), mdf_test[exclcolumn], np.nan)

    if len(mdf_train[exclcolumn].mode())<1:
      fillvalue = mdf_train[exclcolumn].mean()
    else:
      fillvalue = mdf_train[exclcolumn].mode()[0]
      
    #special case if column didn't have any numeric entries
    if fillvalue != fillvalue:
      fillvalue = 0
    
    #replace missing data with fill value
    mdf_train[exclcolumn] = mdf_train[exclcolumn].fillna(fillvalue)
    mdf_test[exclcolumn] = mdf_test[exclcolumn].fillna(fillvalue)
    
    exc2_normalization_dict = {exclcolumn : {'fillvalue' : fillvalue}}
    
    column_dict_list = []

    column_dict = {exclcolumn : {'category' : 'exc5', \
                                 'origcategory' : category, \
                                 'normalization_dict' : exc2_normalization_dict, \
                                 'origcolumn' : column, \
                                 'inputcolumn' : column, \
                                 'columnslist' : [exclcolumn], \
                                 'categorylist' : [exclcolumn], \
                                 'infillmodel' : False, \
                                 'infillcomplete' : False, \
                                 'suffixoverlap_results' : suffixoverlap_results, \
                                 'deletecolumn' : False}}
    
    #now append column_dict onto postprocess_dict
    column_dict_list.append(column_dict.copy())

    return mdf_train, mdf_test, column_dict_list
  
  def process_shfl_class(self, df, column, category, postprocess_dict, params = {}):
    '''
    #function to shuffle data in a column
    #non-numeric entries allowed
    #for missing values, uses adjacent cell infill as default
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    if inplace is not True:
      
      #copy source column into new column
      df, suffixoverlap_results = \
      self.df_copy_train(df, column, column + '_shfl', suffixoverlap_results)
    
    else:
      
      suffixoverlap_results = \
      self.df_check_suffixoverlap(df, column + '_shfl', suffixoverlap_results)
      
      df.rename(columns = {column : column + '_shfl'}, inplace = True)
    
    #we've introduced that randomseed is now accessible throughout in the postprocess_dict
    random = postprocess_dict['randomseed']
    
    #uses support function
    df = self.df_shuffle_series(df, column + '_shfl', random)
    
    #we'll do the adjacent cell infill after the shuffle operation
    
    #apply ffill to replace NArows with value from adjacent cell in preceding row
    df[column + '_shfl'] = df[column + '_shfl'].fillna(method='ffill')
    
    #we'll follow with a bfill just in case first row had a nan
    df[column + '_shfl'] = df[column + '_shfl'].fillna(method='bfill')
    
    
    #create list of columns
    nmbrcolumns = [column + '_shfl']


    nmbrnormalization_dict = {column + '_shfl' : {}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'shfl', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return df, column_dict_list

  def evalcategory(self, df_source, column, randomseed, eval_ratio, \
                   numbercategoryheuristic, powertransform, labels = False):
    '''
    #evalcategory(df, column)
    #Function that dakes as input a dataframe and associated column id \
    #evaluates the contents of cells and classifies the column into one of four categories
    #category 1, 'bnry', is for columns with only two categorys of text or integer
    #category 2, 'nmbr', is for columns with ndumerical integer or float values
    #category 3: 'bxcx', is for nmbr category with all positive values
    #category 4, 'text', is for columns with multiple categories appropriate for one-hot
    #category 5, 'date', is for columns with Timestamp data
    #category 6, 'null', is for columns with >85% null values (arbitrary figure)
    #returns category id as a string
    '''
    
    #we'll introduce convention of special values for powertransform to change default
    #we'll allow powertransform == 'excl' to signal that nonassigned columns should
    #be left untouched (a simpler version of existing functionality of assigning excl in assigncat)
    if powertransform == 'excl':
      category = 'excl'
      
    #or powertransform == 'exc2' for unprocessed but subject to force to numeric and modeinfill
    elif powertransform == 'exc2':
      category = 'exc2'
    
    else:
      
      #_____
      #a few default categories
      
      #default categorical
      #defaultcategorical = 'text'
      defaultcategorical = '1010'
      
      defaultordinal = 'ord3'

      defaultordinal_allunique = 'ord5'
      
      defaultnumerical = 'nmbr'
      #defaultnumerical = 'mean'
      
      defaultdatetime = 'dat6'
      
      #_____

      rowcount = df_source.shape[0]

      #we'll have convention that eval_ratio only applied for sets with >2,000 rows
      if rowcount < 2000:
        eval_ratio = 1.
      else:
        if eval_ratio > 0 and eval_ratio <= 1:
          eval_ratio = eval_ratio
        else:
          eval_ratio = eval_ratio / rowcount
      
      #take a random sample of rows for evaluation based on eval_ratio heuristic
      df = pd.DataFrame(df_source[column]).sample(frac=eval_ratio, random_state=randomseed)

      #I couldn't find a good pandas tool for evaluating data class, \
      #So will produce an array containing data types of each cell and \
      #evaluate for most common variable using the collections library

      type1_df = df[column].apply(lambda x: type(x)).values
      
      #c = collections.Counter(type1_df)
      c = Counter(type1_df)
      mc2 = c.most_common(2)
      mc = [mc2[0]]
      
      #count number of unique values
      nunique = df[column].nunique()

      #check if nan present for cases where nunique == 3
      nanpresent = False
      if nunique == 3:
        for unique in df[column].unique():
          if unique != unique:
            nanpresent = True

      #free memory (dtypes are memory hogs)
      del type1_df

      #additional array needed to check for time series

      #df['typecolumn2'] = df[column].apply(lambda x: type(pd.to_datetime(x, errors = 'coerce')))
      type2_df = df[column].apply(lambda x: type(pd.to_datetime(x, errors = 'coerce'))).values

      #datec = collections.Counter(type2_df)
      datec = Counter(type2_df)
      datemc = datec.most_common(1)
      datemc2 = datec.most_common(2)

      #this is to address scenario where only one value so we can still call mc2[1][0]
      if len(datemc2) == len(datemc):
        datemc2 = datemc + datemc

      #free memory (dtypes are memory hogs)
      del type2_df

      #an extension of this approach could be for those columns that produce a text\
      #category to implement an additional text to determine the number of \
      #common groupings / or the amount of uniquity. For example if every row has\
      #a unique value then one-hot-encoding would not be appropriate. It would \
      #probably be apopropraite to either return an error message if this is found \
      #or alternatively find a furhter way to automate this processing such as \
      #look for contextual clues to groupings that can be inferred.

      #This is kind of hack to evaluate class by comparing these with output of mc
      checkint = 1
      checkfloat = 1.1
      checkstring = 'string'
      checkNAN = np.nan

      #there's probably easier way to do this, here will create a check for date
      df_checkdate = pd.DataFrame([{'checkdate' : '7/4/2018'}])
      df_checkdate['checkdate'] = pd.to_datetime(df_checkdate['checkdate'], errors = 'coerce')

      #create dummy variable to store determined class (default is text class)
      category = defaultcategorical

      #if most common in column is string and > two values, set category to text
      if isinstance(checkstring, mc[0][0]) and nunique > 2:
        category = defaultcategorical

      #if most common is date, set category to date
      if isinstance(df_checkdate['checkdate'][0], datemc[0][0]):
        category = defaultdatetime

      if df[column].dtype.name == 'category':
        if nunique <= 2:
          category = 'bnry'
        else:
          category = defaultcategorical

      #if most common in column is integer and > two values, set category to number of bxcx
      if isinstance(checkint, mc[0][0]) and nunique > 2:

        if df[column].dtype.name == 'category':
          if nunique <= 2:
            category = 'bnry'
          else:
            category = defaultcategorical

        #take account for numbercategoryheuristic
        #if df[column].nunique() / df[column].shape[0] < numbercategoryheuristic:
        #if nunique < numbercategoryheuristic:
        if nunique <= 3:
          if nunique == 3:
            category = 'text'
          else:
            category = 'bnry'
  #       if True is False:
  #         pass

        else:
          category = defaultnumerical

      #if most common in column is float, set category to number or bxcx
      if isinstance(checkfloat, mc[0][0]):

        #take account for numbercategoryheuristic
        #if df[column].nunique() / df[column].shape[0] < numbercategoryheuristic \
  #       if nunique < numbercategoryheuristic \
  #       or df[column].dtype.name == 'category':
  #       if df[column].dtype.name == 'category':
        if df[column].dtype.name == 'category':
          if nunique <= 2:
            category = 'bnry'
          else:
            category = defaultcategorical

        elif nunique <= 3:
          if nunique == 3:
            category = 'text'
          elif nunique <= 2:
            category = 'bnry'

        else:
          category = defaultnumerical

      #if most common in column is integer and <= two values, set category to binary
      if isinstance(checkint, mc[0][0]) and nunique <= 2:
        category = 'bnry'

      #if most common in column is string and <= two values, set category to binary
      if isinstance(checkstring, mc[0][0]) and nunique <= 2:
        category = 'bnry'

      #else if most common in column is NaN, re-evaluate using the second most common type
      #(I suspect the below might be impacted if there are three dtypes instead of two,
      #in which case the 50% ratio rule may not be valid, that is kind of remote edge case)
      #elif df[column].isna().sum() >= df.shape[0] / 2:
      if len(mc2) > 1:
      
        if df[column].isna().sum() >= df.shape[0] / 2:


          #if 2nd most common in column is string and two values, set category to binary
          if isinstance(checkstring, mc2[1][0]) and nunique == 2:
            category = 'bnry'

          #if 2nd most common in column is string and > two values, set category to text
          if isinstance(checkstring, mc2[1][0]) and nunique > 2:
            category = defaultcategorical

          #if 2nd most common is date, set category to date   
          if isinstance(df_checkdate['checkdate'][0], datemc2[1][0]):
            category = defaultdatetime

          #if 2nd most common in column is integer and > two values, set category to number
          if isinstance(checkint, mc2[1][0]) and nunique > 2:

            if df[column].dtype.name == 'category':
              if nunique <= 2:
                category = 'bnry'
              else:
                category = defaultcategorical

    #         #take account for numbercategoryheuristic
    #         #if df[column].nunique() / df[column].shape[0] < numbercategoryheuristic:
            if nunique <= 3:

              if nunique == 3:
                category = defaultcategorical
              else:
                category = 'bnry'

    #         if True is False:
    #           pass

            else:

              category = defaultnumerical

          #if 2nd most common in column is float, set category to number
          if isinstance(checkfloat, mc2[1][0]):

    #         #take account for numbercategoryheuristic
    #         #if df[column].nunique() / df[column].shape[0] < numbercategoryheuristic:
    #         if df[column].nunique() < numbercategoryheuristic:

    #           category = 'text'

    #         else:

            if df[column].dtype.name == 'category':
              if nunique <= 2:
                category = 'bnry'
              else:
                category = defaultcategorical

            if df[column].nunique() <= 3:

              if nunique == 3:
                category = 'text'
              else:
                category = 'bnry'

            else:

              category = defaultnumerical

          #if 2nd most common in column is integer and <= two values, set category to binary
          if isinstance(checkint, mc2[1][0]) and nunique <= 2:
            category = 'bnry'

          #if 2nd most common in column is string and <= two values, set category to binary
          if isinstance(checkstring, mc2[1][0]) and nunique <= 2:
            category = 'bnry'

      if df[column].isna().sum() == df.shape[0]:
        category = 'null'

      #if category == 'text':
      if category == defaultcategorical:
        if nunique > numbercategoryheuristic:
          category = defaultordinal
        if nunique == df.shape[0]:
          category = defaultordinal_allunique

      #new statistical tests for numerical sets from v2.25
      #I don't consider mytself an expert here, these are kind of a placeholder while I conduct more research

  #     #default to 'nmbr' category instead of 'bxcx'
  #     if category == 'bxcx' and powertransform is False:
  #       category = 'nmbr'

      if category in ['nmbr', 'bxcx', defaultnumerical] \
      and powertransform is True:
        
        if df[pd.to_numeric(df[column], errors='coerce').notnull()][column].astype(float).nunique() >= 3:

          #shapiro tests for normality, we'll use a common threshold p<0.05 to reject the normality hypothesis
          stat, p = shapiro(df[pd.to_numeric(df[column], errors='coerce').notnull()][column].astype(float))
          #a typical threshold to test for normality is >0.05, let's try a lower bar for this application
          if p > 0.025:
            category = 'nmbr'
          if p <= 0.025:
            #skewness helps recognize exponential distributions, reference wikipedia
            #reference from wikipedia
    #       A normal distribution and any other symmetric distribution with finite third moment has a skewness of 0
    #       A half-normal distribution has a skewness just below 1
    #       An exponential distribution has a skewness of 2
    #       A lognormal distribution can have a skewness of any positive value, depending on its parameters
            #skewness = skew(df[column])
            skewness = skew(df[pd.to_numeric(df[column], errors='coerce').notnull()][column].astype(float))
            if skewness < 1.5:
              category = 'mnmx'
            else:
              #if powertransform is True:
              if category in ['nmbr', 'bxcx']:

                #note we'll only allow bxcx category if all values greater than a clip value
                #>0 (currently set at 0.1) since there is an asymptote for box-cox at 0
                if (df[pd.to_numeric(df[column], errors='coerce').notnull()][column].astype(float) >= 0.1).all():
                  category = 'bxcx'

                else:
                  category = 'nmbr'

              else:
                category = 'MAD3'

      del df
      
      #special cases for evlauation of labels column
      if labels is True:

        #(defaultnumerical = 'nmbr')
        if category == defaultnumerical:
          category = 'lbnm'
          
        #(defaultcategorical = '1010')
        if category == defaultcategorical:
          category = 'lb10'
          
        #(defaultordinal = 'ord3')
        if category == defaultordinal:
          category = 'lbor'

        #(defaultordinal_allunique = 'ord5')
        if category == defaultordinal_allunique:
          category = 'lbo5'
          
        if category == 'text':
          category = 'lbte'
          
        if category == 'bnry':
          category = 'lbbn'
          
        #(defaultdatetime = 'dat6')
        if category == defaultdatetime:
          category = 'lbda'
    
    return category

  def getNArows(self, df2, column, category, postprocess_dict, \
                drift_dict = {}, driftassess = False):
    '''
    #NArows(df, column), function that when fed a dataframe, \
    #column id, and category label outputs a single column dataframe composed of \
    #True and False with the same number of rows as the input and the True's \
    #coresponding to those rows of the input that had missing or NaN data. This \
    #output can later be used to identify which rows for a column to infill with ML\
    # derived plug data
    
    #also accepts a dictionary to store results of a drfit assessment available
    #by passing driftassess = True
    #if drift assessment performed returns an updated dictionary withj results
    
    #by default all NArowtypes recognize np.inf as NaN
    #(option activated external to this function)
    '''
    
    NArowtype = postprocess_dict['process_dict'][category]['NArowtype']
    
    #originally these evaluations were performed on a copy of the received column
    #struck that approach to reduce memory overhead from copy operation
    #small tradeoff in that edits here (such as cast to numeric) 
    #are preserved outside of this function
    # df2 = pd.DataFrame(df[column].copy())
    
    #if category == 'text':
    if NArowtype in ['justNaN']:
      
      if driftassess is True:
        
        nunique = df2[column].nunique()
        
        #this is to ensure postprocess_dict file size doesn't get out of control so 
        #only collect unique entries in source column drift stats
        #if number of unique is below a threshold (arbrily set to 500)
        if nunique < 500:

          drift_dict.update({column : {'unique' : df2[column].unique(), \
                                       'nunique' : nunique, \
                                       'nanratio' : pd.isna(df2[column]).sum() / df2[column].shape[0]}})
          
        else:
          
          drift_dict.update({column : {'nunique' : nunique, \
                                       'nanratio' : pd.isna(df2[column]).sum() / df2[column].shape[0]}})
          
      #returns dataframe of True and False, where True coresponds to the NaN's
      #renames column name to column + '_NArows'
      NArows = pd.isna(df2[column])
      NArows = pd.DataFrame(NArows)
      NArows = NArows.rename(columns = {column:column+'_NArows'})

#     if category == 'bnry':

#       #returns dataframe of True and False, where True coresponds to the NaN's
#       #renames column name to column + '_NArows'
#       NArows = pd.isna(df2[column])
#       NArows = pd.DataFrame(NArows)
#       NArows = NArows.rename(columns = {column:column+'_NArows'})

    #if category == 'nmbr' or category == 'bxcx':
    #if category in ['nmbr', 'bxcx', 'nbr2']:
    if NArowtype in ['numeric']:

      #convert all values to either numeric or NaN
      df2[column] = pd.to_numeric(df2[column], errors='coerce')
      
      if driftassess is True:
        
        if df2[column].notnull().nunique() > 2:
          W, p = shapiro(df2[df2[column].notnull()][column].astype(float))
          skew_stat = skew(df2[df2[column].notnull()][column].astype(float))
        else:
          W = np.nan
          p = np.nan
          skew_stat = np.nan
          
#         W, p = shapiro(df2[df2[column].notnull()][column].astype(float))
#         skew_stat = skew(df2[df2[column].notnull()][column].astype(float))
        
        drift_dict.update({column : {'max' : df2[column].max(), \
                                     'quantile_99' : df2[column].quantile(0.99), \
                                     'quantile_90' : df2[column].quantile(0.90), \
                                     'quantile_66' : df2[column].quantile(0.66), \
                                     'median' : df2[column].median(), \
                                     'quantile_33' : df2[column].quantile(0.33), \
                                     'quantile_10' : df2[column].quantile(0.10), \
                                     'quantile_01' : df2[column].quantile(0.01), \
                                     'min' : df2[column].min(), \
                                     'mean' : df2[column].mean(), \
                                     'std' : df2[column].std(), \
                                     'MAD' : df2[column].mad(), \
                                     'skew' : skew_stat, \
                                     'shapiro_W' : W, \
                                     'shapiro_p' : p, \
                                     'nan_ratio' : pd.isna(df2[column]).sum() / df2[column].shape[0]}})

      #returns dataframe of True and False, where True coresponds to the NaN's
      #renames column name to column + '_NArows'
      NArows = pd.isna(df2[column])
      NArows = pd.DataFrame(NArows)
      NArows = NArows.rename(columns = {column:column+'_NArows'})
      
    if NArowtype in ['integer']:

      #convert all values to either numeric or NaN
      df2[column] = pd.to_numeric(df2[column], errors='coerce')
      
      #non integers are subject to infill
      df2[column] = np.where(df2[column] == df2[column].round(), df2[column], np.nan)
      
      if driftassess is True:
        
        if df2[column].notnull().nunique() > 2:
          W, p = shapiro(df2[df2[column].notnull()][column].astype(float))
          skew_stat = skew(df2[df2[column].notnull()][column].astype(float))
        else:
          W = np.nan
          p = np.nan
          skew_stat = np.nan
          
#         W, p = shapiro(df2[df2[column].notnull()][column].astype(float))
#         skew_stat = skew(df2[df2[column].notnull()][column].astype(float))
        
        drift_dict.update({column : {'max' : df2[column].max(), \
                                     'quantile_99' : df2[column].quantile(0.99), \
                                     'quantile_90' : df2[column].quantile(0.90), \
                                     'quantile_66' : df2[column].quantile(0.66), \
                                     'median' : df2[column].median(), \
                                     'quantile_33' : df2[column].quantile(0.33), \
                                     'quantile_10' : df2[column].quantile(0.10), \
                                     'quantile_01' : df2[column].quantile(0.01), \
                                     'min' : df2[column].min(), \
                                     'mean' : df2[column].mean(), \
                                     'std' : df2[column].std(), \
                                     'MAD' : df2[column].mad(), \
                                     'skew' : skew_stat, \
                                     'shapiro_W' : W, \
                                     'shapiro_p' : p, \
                                     'nan_ratio' : pd.isna(df2[column]).sum() / df2[column].shape[0]}})

      #returns dataframe of True and False, where True coresponds to the NaN's
      #renames column name to column + '_NArows'
      NArows = pd.isna(df2[column])
      NArows = pd.DataFrame(NArows)
      NArows = NArows.rename(columns = {column:column+'_NArows'})
      
    if NArowtype in ['positivenumeric']:
      
      #convert all values to either numeric or NaN
      df2[column] = pd.to_numeric(df2[column], errors='coerce')
      nonpositive_ratio = df2[df2[column] <= 0].sum()[0] / df2[column].shape[0]
      
      df2.loc[df2[column] <= 0, (column)] = np.nan
      
      if driftassess is True:
        
        if df2[column].notnull().nunique() > 2:
          W, p = shapiro(df2[df2[column].notnull()][column].astype(float))
          skew_stat = skew(df2[df2[column].notnull()][column].astype(float))
        else:
          W = np.nan
          p = np.nan
          skew_stat = np.nan
        
        drift_dict.update({column : {'max' : df2[column].max(), \
                                     'quantile_99' : df2[column].quantile(0.99), \
                                     'quantile_90' : df2[column].quantile(0.90), \
                                     'quantile_66' : df2[column].quantile(0.66), \
                                     'median' : df2[column].median(), \
                                     'quantile_33' : df2[column].quantile(0.33), \
                                     'quantile_10' : df2[column].quantile(0.10), \
                                     'quantile_01' : df2[column].quantile(0.01), \
                                     'min' : df2[column].min(), \
                                     'mean' : df2[column].mean(), \
                                     'std' : df2[column].std(), \
                                     'MAD' : df2[column].mad(), \
                                     'skew' : skew_stat, \
                                     'shapiro_W' : W, \
                                     'shapiro_p' : p, \
                                     'nonpositive_ratio' : nonpositive_ratio, \
                                     'nan_ratio' : pd.isna(df2[column]).sum() / df2[column].shape[0]}})
    
      #returns dataframe of True and False, where True coresponds to the NaN's
      #renames column name to column + '_NArows'
      NArows = pd.isna(df2[column])
      NArows = pd.DataFrame(NArows)
      NArows = NArows.rename(columns = {column:column+'_NArows'})
      
    if NArowtype in ['nonnegativenumeric']:
      
      #convert all values to either numeric or NaN
      df2[column] = pd.to_numeric(df2[column], errors='coerce')
      negative_ratio = df2[df2[column] < 0].sum()[0] / df2[column].shape[0]
      
      df2.loc[df2[column] < 0, (column)] = np.nan
      
      if driftassess is True:
        
        if df2[column].notnull().nunique() > 2:
          W, p = shapiro(df2[df2[column].notnull()][column].astype(float))
          skew_stat = skew(df2[df2[column].notnull()][column].astype(float))
        else:
          W = np.nan
          p = np.nan
          skew_stat = np.nan
        
        drift_dict.update({column : {'max' : df2[column].max(), \
                                     'quantile_99' : df2[column].quantile(0.99), \
                                     'quantile_90' : df2[column].quantile(0.90), \
                                     'quantile_66' : df2[column].quantile(0.66), \
                                     'median' : df2[column].median(), \
                                     'quantile_33' : df2[column].quantile(0.33), \
                                     'quantile_10' : df2[column].quantile(0.10), \
                                     'quantile_01' : df2[column].quantile(0.01), \
                                     'min' : df2[column].min(), \
                                     'mean' : df2[column].mean(), \
                                     'std' : df2[column].std(), \
                                     'MAD' : df2[column].mad(), \
                                     'skew' : skew_stat, \
                                     'shapiro_W' : W, \
                                     'shapiro_p' : p, \
                                     'negative_ratio' : negative_ratio, \
                                     'nan_ratio' : pd.isna(df2[column]).sum() / df2[column].shape[0]}})

      #returns dataframe of True and False, where True coresponds to the NaN's
      #renames column name to column + '_NArows'
      NArows = pd.isna(df2[column])
      NArows = pd.DataFrame(NArows)
      NArows = NArows.rename(columns = {column:column+'_NArows'})
      
    if NArowtype in ['nonzeronumeric']:

      #convert all values to either numeric or NaN
      df2[column] = pd.to_numeric(df2[column], errors='coerce')
      zero_ratio = df2[df2[column] == 0].sum()[0] / df2[column].shape[0]
      
      df2.loc[df2[column] == 0, (column)] = np.nan
      
      if driftassess is True:
        
        if df2[column].notnull().nunique() > 2:
          W, p = shapiro(df2[df2[column].notnull()][column].astype(float))
          skew_stat = skew(df2[df2[column].notnull()][column].astype(float))
        else:
          W = np.nan
          p = np.nan
          skew_stat = np.nan
        
        drift_dict.update({column : {'max' : df2[column].max(), \
                                     'quantile_99' : df2[column].quantile(0.99), \
                                     'quantile_90' : df2[column].quantile(0.90), \
                                     'quantile_66' : df2[column].quantile(0.66), \
                                     'median' : df2[column].median(), \
                                     'quantile_33' : df2[column].quantile(0.33), \
                                     'quantile_10' : df2[column].quantile(0.10), \
                                     'quantile_01' : df2[column].quantile(0.01), \
                                     'min' : df2[column].min(), \
                                     'mean' : df2[column].mean(), \
                                     'std' : df2[column].std(), \
                                     'MAD' : df2[column].mad(), \
                                     'skew' : skew_stat, \
                                     'shapiro_W' : W, \
                                     'shapiro_p' : p, \
                                     'zero_ratio' : zero_ratio, \
                                     'nan_ratio' : pd.isna(df2[column]).sum() / df2[column].shape[0]}})

      #returns dataframe of True and False, where True coresponds to the NaN's
      #renames column name to column + '_NArows'
      NArows = pd.isna(df2[column])
      NArows = pd.DataFrame(NArows)
      NArows = NArows.rename(columns = {column:column+'_NArows'})
      
    if NArowtype in ['parsenumeric']:
      
      NArows = self.parsenumeric(df2, column)

      drift_dict.update({column : {'nunique' : df2[column].nunique(), \
                                   'nanratio' : pd.isna(df2[column]).sum() / df2[column].shape[0]}})      
      
    if NArowtype in ['parsenumeric_commas']:
      
      NArows = self.parsenumeric_commas(df2, column)

      drift_dict.update({column : {'nunique' : df2[column].nunique(), \
                                   'nanratio' : pd.isna(df2[column]).sum() / df2[column].shape[0]}})
      
    if NArowtype in ['parsenumeric_EU']:
      
      NArows = self.parsenumeric_EU(df2, column)

      drift_dict.update({column : {'nunique' : df2[column].nunique(), \
                                   'nanratio' : pd.isna(df2[column]).sum() / df2[column].shape[0]}})
      
    if NArowtype in ['datetime']:
      
      df2[column] = pd.to_datetime(df2[column], errors = 'coerce')

      if driftassess is True:
        drift_dict.update({column : {'nanratio' : pd.isna(df2[column]).sum() / df2[column].shape[0]}})
      
      NArows = pd.isna(df2[column])
      NArows = pd.DataFrame(NArows)
      NArows = NArows.rename(columns = {column:column+'_NArows'})
      
#       NArows = self.parsedate(df2, column)
      
    #if category in ['excl']:
    if NArowtype in ['exclude', 'boolexclude', 'totalexclude']:
      
      if driftassess is True:
        drift_dict.update({column : {}})
      
#       NArows = pd.DataFrame(np.zeros((df2.shape[0], 1)), columns=[column+'_NArows'])
      #NArows = NArows.rename(columns = {column:column+'_NArows'})
      
      NArows = pd.DataFrame(df2[column].copy())
      NArows[column] = False
      NArows = NArows.rename(columns = {column:column+'_NArows'})
#       NArows[column+'_NArows'] = False
      
    del df2
    
    if driftassess is False:
      
      return NArows
    
    else:
      
      return NArows, drift_dict
  
  def parsedate(self, df, column):
    """
    #support function for NArows
    #parses datetime entries and returns a column with boolean identification
    #for entries that aren't registering as datetime objects
    #wherein activations are 0 if a datetime is present and 1 if not
    """
    
    df[column] = pd.to_datetime(df[column], errors = 'coerce')

    NArows = pd.isna(df[column])
    NArows = pd.DataFrame(NArows)
    NArows = NArows.rename(columns = {column:column+'_NArows'})
    
    return NArows
  
  def is_number(self, s):
    """
    #support function for numeric parsing
    #tests if a string s is numeric
    #partly inspired by stack overflow discussion 
    #https://stackoverflow.com/questions/354038/how-do-i-check-if-a-string-is-a-number-float
    """
    try:
      s = float(s)
    except ValueError:
      return False
    if s == s and not np.isinf(s):
      return True
    else:
      #(nan will be subject to infill)
      return False

  def is_number_comma(self, s):
    """
    #support function for numeric parsing
    #tests if a string s is numeric after stripping out commas
    #partly inspired by stack overflow discussion 
    #https://stackoverflow.com/questions/354038/how-do-i-check-if-a-string-is-a-number-float
    """
    try:
      #strips out commas
      s = float(s.replace(',',''))
    except ValueError:
      return False
    if s == s and not np.isinf(s):
      return True
    else:
      #(nan will be subject to infill)
      return False
    
  def is_number_EU(self, s):
    """
    #support function for numeric parsing
    #tests if a string s is numeric after stripping out periods
    #and replacing commas with periods
    #such as to convert from international conventions to US
    #this is relavent for cases where working with international data on US OS
    #I expect if working on international OS a different convention may be appropriate
    #based on what is recognized as a float
    """
    try:
      #strips out spaces, periods other than first and last character, replaces commas with periods, cast as float
      s = float(s[0] + s[1:-1].replace(' ','').replace('.','').replace(',','.') + s[-1])
    except ValueError:
      return False
    if s == s and not np.isinf(s):
      return True
    else:
      #(nan will be subject to infill)
      return False
    
  def parsenumeric(self, df, column):
    """
    #support function for process_nmrc and NArows
    #parses string entries and returns a column with boolean identification
    #for entries that include numeric string portions
    #wherein activations are 0 if a number is present and 1 if not
    #treats numeric entries as number as well
    """
    
    #first we find overlaps from mdf_train
    
    unique_list = list(df[column].unique())

    unique_list = list(map(str, unique_list))
    
    maxlength = max(len(x) for x in unique_list)
    
    overlap_lengths = list(range(maxlength, 0, -1))

    overlap_dict = {}
    
    for overlap_length in overlap_lengths:

      for unique in unique_list:
        
        if unique not in overlap_dict:

          len_unique = len(unique)

          if len_unique >= overlap_length:
            
            if overlap_length > 1:

              nbr_iterations = len_unique - overlap_length

              for i in range(nbr_iterations + 1):
                
                if unique not in overlap_dict:

                  extract = unique[i:(overlap_length+i)]

  #                 extract_already_in_overlap_dict = False

                  if self.is_number(extract):
        
                    overlap_dict.update({unique : False})
                
            #else if overlap_length == 1    
            else:
              
              nbr_iterations = len_unique - overlap_length
              
              in_dict = False

              for i in range(nbr_iterations + 1):
                
                if unique not in overlap_dict:

                  extract = unique[i:(overlap_length+i)]

  #                 extract_already_in_overlap_dict = False

                  if self.is_number(extract):

                    in_dict = True

                    overlap_dict.update({unique : False})
                  
              if in_dict is False:

                overlap_dict.update({unique : True})
    
    NArows = pd.DataFrame(df[column].copy())

    NArows[column] = NArows[column].astype(str)
    NArows[column] = NArows[column].replace(overlap_dict)
#     df[column] = df[column].astype(np.int8)
    
    NArows.columns = [column+'_NArows']
    
    return NArows
  
  def parsenumeric_commas(self, df, column):
    """
    #support function for process_nmrc and NArows
    #parses string entries and returns a column with boolean identification
    #for entries that include numeric string portions (after stripping commas)
    #wherein activations are 0 if a number is present and 1 if not
    #treats numeric entries as number as well
    """
    
    #first we find overlaps from mdf_train
    
    unique_list = list(df[column].unique())

    unique_list = list(map(str, unique_list))
    
    maxlength = max(len(x) for x in unique_list)
    
    overlap_lengths = list(range(maxlength, 0, -1))

    overlap_dict = {}
    
    for overlap_length in overlap_lengths:

      for unique in unique_list:
        
        if unique not in overlap_dict:

          len_unique = len(unique)

          if len_unique >= overlap_length:
            
            if overlap_length > 1:

              nbr_iterations = len_unique - overlap_length

              for i in range(nbr_iterations + 1):
                
                if unique not in overlap_dict:

                  extract = unique[i:(overlap_length+i)]

  #                 extract_already_in_overlap_dict = False

                  if self.is_number_comma(extract):

                    overlap_dict.update({unique : False})
                
            #else if overlap_length == 1    
            else:
              
              nbr_iterations = len_unique - overlap_length
              
              in_dict = False

              for i in range(nbr_iterations + 1):
                
                if unique not in overlap_dict:

                  extract = unique[i:(overlap_length+i)]

  #                 extract_already_in_overlap_dict = False

                  if self.is_number_comma(extract):

                    in_dict = True

                    overlap_dict.update({unique : False})
                  
              if in_dict is False:

                overlap_dict.update({unique : True})
    
    NArows = pd.DataFrame(df[column].copy())

    NArows[column] = NArows[column].astype(str)
    NArows[column] = NArows[column].replace(overlap_dict)
#     df[column] = df[column].astype(np.int8)
    
    NArows.columns = [column+'_NArows']
    
    return NArows
  
  def parsenumeric_EU(self, df, column):
    """
    #support function for process_nmEU and NArows
    #parses string entries and returns a column with boolean identification
    #for entries that include numeric string portions (after converting from international format)
    #wherein activations are 0 if a number is present and 1 if not
    #treats numeric entries as number as well
    """
    
    #first we find overlaps from mdf_train
    
    unique_list = list(df[column].unique())

    unique_list = list(map(str, unique_list))
    
    maxlength = max(len(x) for x in unique_list)
    
    overlap_lengths = list(range(maxlength, 0, -1))

    overlap_dict = {}
    
    for overlap_length in overlap_lengths:

      for unique in unique_list:
        
        if unique not in overlap_dict:

          len_unique = len(unique)

          if len_unique >= overlap_length:
            
            if overlap_length > 1:

              nbr_iterations = len_unique - overlap_length

              for i in range(nbr_iterations + 1):
                
                if unique not in overlap_dict:

                  extract = unique[i:(overlap_length+i)]

  #                 extract_already_in_overlap_dict = False

                  if self.is_number_EU(extract):

                    overlap_dict.update({unique : False})
                
            #else if overlap_length == 1    
            else:
              
              nbr_iterations = len_unique - overlap_length
              
              in_dict = False

              for i in range(nbr_iterations + 1):
                
                if unique not in overlap_dict:

                  extract = unique[i:(overlap_length+i)]

  #                 extract_already_in_overlap_dict = False

                  if self.is_number_EU(extract):

                    in_dict = True

                    overlap_dict.update({unique : False})
                  
              if in_dict is False:

                overlap_dict.update({unique : True})
    
    NArows = pd.DataFrame(df[column].copy())

    NArows[column] = NArows[column].astype(str)
    NArows[column] = NArows[column].replace(overlap_dict)
#     df[column] = df[column].astype(np.int8)
    
    NArows.columns = [column+'_NArows']
    
    return NArows

  def populateMLinfilldefaults(self, randomseed):
    '''
    populates a dictionary with default values for ML infill,
    currently based on Random Forest Regressor and Random Forest Classifier 
    (Each based on ScikitLearn default values)
  
    note that n_estimators set at 100 (default for version 0.22)
    '''
  
    MLinfilldefaults = {'RandomForestClassifier':{}, 'RandomForestRegressor':{}}
    
    MLinfilldefaults['RandomForestClassifier'].update({'n_estimators':100, \
                                                       'criterion':'gini', \
                                                       'max_depth':None, \
                                                       'min_samples_split':2, \
                                                       'min_samples_leaf':1, \
                                                       'min_weight_fraction_leaf':0.0, \
                                                       'max_features':'auto', \
                                                       'max_leaf_nodes':None, \
                                                       'min_impurity_decrease':0.0, \
                                                       'min_impurity_split':None, \
                                                       'bootstrap':True, \
                                                       'oob_score':False, \
                                                       'n_jobs':None, \
                                                       'random_state':randomseed, \
                                                       'verbose':0, \
                                                       'warm_start':False, \
                                                       'class_weight':None})
  
    MLinfilldefaults['RandomForestRegressor'].update({'n_estimators':100, \
                                                      'criterion':'mse', \
                                                      'max_depth':None, \
                                                      'min_samples_split':2, \
                                                      'min_samples_leaf':1, \
                                                      'min_weight_fraction_leaf':0.0, \
                                                      'max_features':'auto', \
                                                      'max_leaf_nodes':None, \
                                                      'min_impurity_decrease':0.0, \
                                                      'min_impurity_split':None, \
                                                      'bootstrap':True, \
                                                      'oob_score':False, \
                                                      'n_jobs':None, \
                                                      'random_state':randomseed, \
                                                      'verbose':0, \
                                                      'warm_start':False})

    return MLinfilldefaults

  def initRandomForestClassifier(self, ML_cmnd, MLinfilldefaults):
    '''
    function that assigns appropriate parameters based on defaults and user inputs
    and then initializes a RandomForestClassifier model
    '''
    
    #populate ML_cmnd if stuff not already present
    if 'MLinfill_cmnd' not in ML_cmnd:
      ML_cmnd.update({'MLinfill_cmnd':{}})
    if 'RandomForestClassifier' not in ML_cmnd['MLinfill_cmnd']:
      ML_cmnd['MLinfill_cmnd'].update({'RandomForestClassifier':{}})
      

    #MLinfilldefaults['RandomForestClassifier']
    if 'n_estimators' in ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']:
      n_estimators = ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']['n_estimators']
    else:
      n_estimators = MLinfilldefaults['RandomForestClassifier']['n_estimators']

    if 'criterion' in ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']:
      criterion = ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']['criterion']
    else:
      criterion = MLinfilldefaults['RandomForestClassifier']['criterion']

    if 'max_depth' in ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']:
      max_depth = ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']['max_depth']
    else:
      max_depth = MLinfilldefaults['RandomForestClassifier']['max_depth']

    if 'min_samples_split' in ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']:
      min_samples_split = ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']['min_samples_split']
    else:
      min_samples_split = MLinfilldefaults['RandomForestClassifier']['min_samples_split']

    if 'min_samples_leaf' in ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']:
      min_samples_leaf = ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']['min_samples_leaf']
    else:
      min_samples_leaf = MLinfilldefaults['RandomForestClassifier']['min_samples_leaf']

    if 'min_weight_fraction_leaf' in ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']:
      min_weight_fraction_leaf = ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']['min_weight_fraction_leaf']
    else:
      min_weight_fraction_leaf = MLinfilldefaults['RandomForestClassifier']['min_weight_fraction_leaf']

    if 'max_features' in ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']:
      max_features = ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']['max_features']
    else:
      max_features = MLinfilldefaults['RandomForestClassifier']['max_features']

    if 'max_leaf_nodes' in ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']:
      max_leaf_nodes = ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']['max_leaf_nodes']
    else:
      max_leaf_nodes = MLinfilldefaults['RandomForestClassifier']['max_leaf_nodes']

    if 'min_impurity_decrease' in ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']:
      min_impurity_decrease = ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']['min_impurity_decrease']
    else:
      min_impurity_decrease = MLinfilldefaults['RandomForestClassifier']['min_impurity_decrease']

    if 'min_impurity_split' in ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']:
      min_impurity_split = ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']['min_impurity_split']
    else:
      min_impurity_split = MLinfilldefaults['RandomForestClassifier']['min_impurity_split']

    if 'bootstrap' in ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']:
      bootstrap = ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']['bootstrap']
    else:
      bootstrap = MLinfilldefaults['RandomForestClassifier']['bootstrap']

    if 'oob_score' in ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']:
      oob_score = ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']['oob_score']
    else:
      oob_score = MLinfilldefaults['RandomForestClassifier']['oob_score']

    if 'n_jobs' in ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']:
      n_jobs = ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']['n_jobs']
    else:
      n_jobs = MLinfilldefaults['RandomForestClassifier']['n_jobs']

    if 'random_state' in ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']:
      random_state = ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']['random_state']
    else:
      random_state = MLinfilldefaults['RandomForestClassifier']['random_state']

    if 'verbose' in ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']:
      verbose = ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']['verbose']
    else:
      verbose = MLinfilldefaults['RandomForestClassifier']['verbose']

    if 'warm_start' in ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']:
      warm_start = ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']['warm_start']
    else:
      warm_start = MLinfilldefaults['RandomForestClassifier']['warm_start']

    if 'class_weight' in ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']:
      class_weight = ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']['class_weight']
    else:
      class_weight = MLinfilldefaults['RandomForestClassifier']['class_weight']

    #do other stuff?

    #then initialize RandomForestClassifier model
    model = RandomForestClassifier(n_estimators = n_estimators, \
                                   criterion = criterion, \
                                   max_depth = max_depth, \
                                   min_samples_split = min_samples_split, \
                                   min_samples_leaf = min_samples_leaf, \
                                   min_weight_fraction_leaf = min_weight_fraction_leaf, \
                                   max_features = max_features, \
                                   max_leaf_nodes = max_leaf_nodes, \
                                   min_impurity_decrease = min_impurity_decrease, \
                                   min_impurity_split = min_impurity_split, \
                                   bootstrap = bootstrap, \
                                   oob_score = oob_score, \
                                   n_jobs = n_jobs, \
                                   random_state = random_state, \
                                   verbose = verbose, \
                                   warm_start = warm_start, \
                                   class_weight = class_weight)

    return model

  def initRandomForestRegressor(self, ML_cmnd, MLinfilldefaults):
    '''
    function that assigns appropriate parameters based on defaults and user inputs
    and then initializes a RandomForestRegressor model
    '''
    
    #populate ML_cmnd if stuff not already present
    if 'MLinfill_cmnd' not in ML_cmnd:
      ML_cmnd.update({'MLinfill_cmnd':{}})
    if 'RandomForestRegressor' not in ML_cmnd['MLinfill_cmnd']:
      ML_cmnd['MLinfill_cmnd'].update({'RandomForestRegressor':{}})
      
    #MLinfilldefaults['RandomForestRegressor']
    if 'n_estimators' in ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']:
      n_estimators = ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']['n_estimators']
    else:
      n_estimators = MLinfilldefaults['RandomForestRegressor']['n_estimators']

    if 'criterion' in ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']:
      criterion = ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']['criterion']
    else:
      criterion = MLinfilldefaults['RandomForestRegressor']['criterion']

    if 'max_depth' in ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']:
      max_depth = ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']['max_depth']
    else:
      max_depth = MLinfilldefaults['RandomForestRegressor']['max_depth']

    if 'min_samples_split' in ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']:
      min_samples_split = ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']['min_samples_split']
    else:
      min_samples_split = MLinfilldefaults['RandomForestRegressor']['min_samples_split']

    if 'min_samples_leaf' in ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']:
      min_samples_leaf = ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']['min_samples_leaf']
    else:
      min_samples_leaf = MLinfilldefaults['RandomForestRegressor']['min_samples_leaf']

    if 'min_weight_fraction_leaf' in ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']:
      min_weight_fraction_leaf = ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']['min_weight_fraction_leaf']
    else:
      min_weight_fraction_leaf = MLinfilldefaults['RandomForestRegressor']['min_weight_fraction_leaf']

    if 'max_features' in ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']:
      max_features = ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']['max_features']
    else:
      max_features = MLinfilldefaults['RandomForestRegressor']['max_features']

    if 'max_leaf_nodes' in ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']:
      max_leaf_nodes = ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']['max_leaf_nodes']
    else:
      max_leaf_nodes = MLinfilldefaults['RandomForestRegressor']['max_leaf_nodes']

    if 'min_impurity_decrease' in ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']:
      min_impurity_decrease = ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']['min_impurity_decrease']
    else:
      min_impurity_decrease = MLinfilldefaults['RandomForestRegressor']['min_impurity_decrease']

    if 'min_impurity_split' in ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']:
      min_impurity_split = ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']['min_impurity_split']
    else:
      min_impurity_split = MLinfilldefaults['RandomForestRegressor']['min_impurity_split']

    if 'bootstrap' in ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']:
      bootstrap = ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']['bootstrap']
    else:
      bootstrap = MLinfilldefaults['RandomForestRegressor']['bootstrap']

    if 'oob_score' in ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']:
      oob_score = ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']['oob_score']
    else:
      oob_score = MLinfilldefaults['RandomForestRegressor']['oob_score']

    if 'n_jobs' in ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']:
      n_jobs = ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']['n_jobs']
    else:
      n_jobs = MLinfilldefaults['RandomForestClassifier']['n_jobs']

    if 'random_state' in ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']:
      random_state = ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']['random_state']
    else:
      random_state = MLinfilldefaults['RandomForestRegressor']['random_state']

    if 'verbose' in ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']:
      verbose = ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']['verbose']
    else:
      verbose = MLinfilldefaults['RandomForestRegressor']['verbose']

    if 'warm_start' in ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']:
      warm_start = ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']['warm_start']
    else:
      warm_start = MLinfilldefaults['RandomForestRegressor']['warm_start']

    #do other stuff?

    #then initialize RandomForestRegressor model 
    model = RandomForestRegressor(n_estimators = n_estimators, \
                                  criterion = criterion, \
                                  max_depth = max_depth, \
                                  min_samples_split = min_samples_split, \
                                  min_samples_leaf = min_samples_leaf, \
                                  min_weight_fraction_leaf = min_weight_fraction_leaf, \
                                  max_features = max_features, \
                                  max_leaf_nodes = max_leaf_nodes, \
                                  min_impurity_decrease = min_impurity_decrease, \
                                  min_impurity_split = min_impurity_split, \
                                  bootstrap = bootstrap, \
                                  oob_score = oob_score, \
                                  n_jobs = n_jobs, \
                                  random_state = random_state, \
                                  verbose = verbose, \
                                  warm_start = warm_start)

    return model
  
  def inspect_ML_cmnd(self, ML_cmnd, autoML_type, MLinfill_alg):
    """
    #Inspects ML_cmnd to determine if any of the parameters passed
    #for regressor or classifier are passed as lists instead of distinct
    #values, in which case they will be evaluated via grid search
    #or in a future extension random search or other hyperparameter tuning methods
    
    #takes as input user-passed ML_cmnd, returns tune_marker
    #where tune_marker = True indicates sets were passed, else False
    
    #MLinfill_type refers to type of predcitive algorithm applied,
    #currently only support for scikit Random Forest via 'default'
    #intent is to build in additional options
    
    #MLinfill_alg refers to the target algorithm for passed parameters
    #currently supports 'RandomForestRegressor' & 'RandomForestClassifier'
    """
    
    #initialize tune_marker to default
    tune_marker = False
    
    if autoML_type == 'randomforest':
    
      if 'MLinfill_cmnd' in ML_cmnd:

        if MLinfill_alg in ML_cmnd['MLinfill_cmnd']:

          for key in ML_cmnd['MLinfill_cmnd'][MLinfill_alg]:

            #if passed parameter is a set
            if type(ML_cmnd['MLinfill_cmnd'][MLinfill_alg][key]) \
            in [type([1]), type(range(1)), type(stats.expon(1))]:

              tune_marker = True
    
    return tune_marker
  
  def assemble_param_sets(self, ML_cmnd, autoML_type, MLinfill_alg):
    """
    #assembles ML_cmnd passed parameters into two sets
    #for hyoeroparameter tuning operation
    
    #those parameters that were passed as sets 
    #will be saved in tune_params dictionary
    
    #those parameters that were otherwise passed
    #will be saved in static_params dictionary
    
    #returns those two dictionaries tune_params & static_params
    
    #MLinfill_type refers to type of predcitive algorithm applied,
    #currently only support for scikit Random Forest via 'default'
    #intent is to build in additional options
    
    #MLinfill_alg refers to the target algorithm for passed parameters
    #currently supports 'RandomForestRegressor' & 'RandomForestClassifier'
    """
    
    #initialize returned dictionaries
    static_params = {}
    tune_params = {}
    
    if autoML_type == 'randomforest':
      
      if 'MLinfill_cmnd' in ML_cmnd:
        
        if MLinfill_alg in ML_cmnd['MLinfill_cmnd']:
          
          for key in ML_cmnd['MLinfill_cmnd'][MLinfill_alg]:
            
            #if passed parameter is a set
            if type(ML_cmnd['MLinfill_cmnd'][MLinfill_alg][key]) \
            in [type([1]), type(range(1)), type(stats.expon(1))]:
              
              #add set to tune_params which will be targeted for grid search
              tune_params.update({key : ML_cmnd['MLinfill_cmnd'][MLinfill_alg][key]})
              
            else:
              
              #else add to static_params which will overwrite defaults
              static_params.update({key : ML_cmnd['MLinfill_cmnd'][MLinfill_alg][key]})
        
    return static_params, tune_params

  def predictinfill(self, category, df_train_filltrain, df_train_filllabel, \
                    df_train_fillfeatures, df_test_fillfeatures, randomseed, \
                    postprocess_dict, ML_cmnd, autoMLer, printstatus, categorylist = []):
    '''
    #predictinfill(category, df_train_filltrain, df_train_filllabel, \
    #df_train_fillfeatures, df_test_fillfeatures, randomseed, categorylist), \
    #function that takes as input \
    #a category string, the output of createMLinfillsets(.), a seed for randomness \
    #and a list of columns produced by a text class preprocessor when applicable and 
    #returns predicted infills for the train and test feature sets as df_traininfill, \
    #df_testinfill based on derivations using scikit-learn, with the lenth of \
    #infill consistent with the number of True values from NArows, and the trained \
    #model
    #accepts autoMLer populated with architecture options which is applied based on entries to ML_cmnd
    '''

    #if autoML_type not specified than we'll apply default (randomforest)
    #note this is only a temporary update to ML_cmnd and is not returned from function call
    if 'autoML_type' not in ML_cmnd:
      ML_cmnd.update({'autoML_type' : 'randomforest'})
    
    #grab autoML_type from ML_cmnd, this will be one of our keys for autoMLer dictionary
    autoML_type = ML_cmnd['autoML_type']
  
    #MLinfilltype distinguishes between classifier/regressor, single/multi column, ordinal/onehot/binary, etc
    #see potential values documented in assembleprocessdict function
    MLinfilltype = postprocess_dict['process_dict'][category]['MLinfilltype']
    
    #if a numeric target set
    if MLinfilltype in ['numeric', 'concurrent_nmbr']:
      
      #edge case if training data has zero rows (such as if column was all NaN) 
      if df_train_filltrain.shape[0] == 0:
        df_traininfill = np.zeros(shape=(1,len(categorylist)))
        df_testinfill = np.zeros(shape=(1,len(categorylist)))

        model = False
      
      else:
        
        #now call our training function
        #which handles tuning if applicable, model initialization, and training
        
        #ML_application is another key to access the function, distinguishes between classification and regression
        ML_application = 'regression'
        
        model = \
        autoMLer[autoML_type][ML_application]['train'](ML_cmnd, df_train_filltrain, df_train_filllabel, randomseed, printstatus)
        
        #only run following if we have any train rows needing infill
        if df_train_fillfeatures.shape[0] > 0:
          df_traininfill = autoMLer[autoML_type][ML_application]['predict'](ML_cmnd, model, df_train_fillfeatures, printstatus, categorylist)
        else:
          df_traininfill = np.array([0])

        #only run following if we have any test rows needing infill
        if df_test_fillfeatures.shape[0] > 0:
          df_testinfill = autoMLer[autoML_type][ML_application]['predict'](ML_cmnd, model, df_test_fillfeatures, printstatus, categorylist)
        else:
          df_testinfill = np.array([0])

      #convert infill values to dataframe
      df_traininfill = pd.DataFrame(df_traininfill, columns = ['infill'])
      df_testinfill = pd.DataFrame(df_testinfill, columns = ['infill'])
      
    #if target is categoric, such as ordinal or boolean integers
    if MLinfilltype in ['singlct', 'binary', 'concurrent_act']:
      
      #edge case if training data has zero rows (such as if column was all NaN) 
      if df_train_filltrain.shape[0] == 0:
        df_traininfill = np.zeros(shape=(1,len(categorylist)))
        df_testinfill = np.zeros(shape=(1,len(categorylist)))

        model = False

      else:
        
        #now call our training function
        #which handles tuning if applicable, model initialization, and training
        
        #ML_application is another key to access the function, distinguishes between classification and regression
        if MLinfilltype == 'singlct':
          ML_application = 'ordinalclassification'
        else:
          ML_application = 'booleanclassification'
        
        model = \
        autoMLer[autoML_type][ML_application]['train'](ML_cmnd, df_train_filltrain, df_train_filllabel, randomseed, printstatus)
        
        #only run following if we have any train rows needing infill
        if df_train_fillfeatures.shape[0] > 0:
          df_traininfill = autoMLer[autoML_type][ML_application]['predict'](ML_cmnd, model, df_train_fillfeatures, printstatus, categorylist)
        else:
          df_traininfill = np.array([0])

        #only run following if we have any test rows needing infill
        if df_test_fillfeatures.shape[0] > 0:
          df_testinfill = autoMLer[autoML_type][ML_application]['predict'](ML_cmnd, model, df_test_fillfeatures, printstatus, categorylist)
        else:
          df_testinfill = np.array([0])

      #convert infill values to dataframe
      df_traininfill = pd.DataFrame(df_traininfill, columns = ['infill'])
      df_testinfill = pd.DataFrame(df_testinfill, columns = ['infill'])
      
    #if target is multi column categoric (onehot encoded) / (binary encoded handled seperately)
    if MLinfilltype in ['multirt']:

      if df_train_filltrain.shape[0] == 0:
        df_traininfill = np.zeros(shape=(1,len(categorylist)))
        df_testinfill = np.zeros(shape=(1,len(categorylist)))

        model = False

      else:
        
        #future extension - Label Smoothing for ML infill
        #(might incorporate this into the training function to be activated by ML_cmnd)
          
        #now call our training function
        #which handles tuning if applicable, model initialization, and training
        
        #ML_application is another key to access the function, distinguishes between classification and regression
        ML_application = 'onehotclassification'
        
        model = \
        autoMLer[autoML_type][ML_application]['train'](ML_cmnd, df_train_filltrain, df_train_filllabel, randomseed, printstatus)
        
        #only run following if we have any train rows needing infill
        if df_train_fillfeatures.shape[0] > 0:
          df_traininfill = autoMLer[autoML_type][ML_application]['predict'](ML_cmnd, model, df_train_fillfeatures, printstatus, categorylist)
        else:
          #this needs to have same number of columns as text category
          df_traininfill = np.zeros(shape=(1,len(categorylist)))
        
        #only run following if we have any test rows needing infill
        if df_test_fillfeatures.shape[0] > 0:
          df_testinfill = autoMLer[autoML_type][ML_application]['predict'](ML_cmnd, model, df_test_fillfeatures, printstatus, categorylist)
        else:
          #this needs to have same number of columns as text category
          df_testinfill = np.zeros(shape=(1,len(categorylist)))
          
      #convert infill values to dataframe (this column labeleling also works for single column case)
      df_traininfill = pd.DataFrame(df_traininfill, columns = categorylist)
      df_testinfill = pd.DataFrame(df_testinfill, columns = categorylist)
    
    #if target is a binary encoded categoric set
    if MLinfilltype in ['1010']:
      
      if df_train_filltrain.shape[0] == 0:

        df_traininfill = np.zeros(shape=(1,len(categorylist)))
        df_testinfill = np.zeros(shape=(1,len(categorylist)))

        model = False

      else:

        #convert from binary to one-hot encoding
        df_train_filllabel = \
        self.convert_1010_to_onehot(df_train_filllabel)
          
        #now call our training function
        #which handles tuning if applicable, model initialization, and training
        
        #ML_application is another key to access the function, distinguishes between classification and regression
        ML_application = 'onehotclassification'
        
        model = \
        autoMLer[autoML_type][ML_application]['train'](ML_cmnd, df_train_filltrain, df_train_filllabel, randomseed, printstatus)

        #this is to support 1010 infill predictions in postmunge
        for entry in categorylist:
          postprocess_dict['column_dict'][entry].update({'_1010_categorylist_proxy_for_postmunge_MLinfill' : list(range(df_train_filllabel.shape[1]))})
        
        #only run following if we have any train rows needing infill
        if df_train_fillfeatures.shape[0] > 0:
          df_traininfill = autoMLer[autoML_type][ML_application]['predict'](ML_cmnd, model, df_train_fillfeatures, printstatus, list(range(df_train_filllabel.shape[1])))

          df_traininfill = \
          self.convert_onehot_to_1010(df_traininfill)

        else:
          #this needs to have same number of columns as text category
          df_traininfill = np.zeros(shape=(1,len(categorylist)))
        
        #only run following if we have any test rows needing infill
        if df_test_fillfeatures.shape[0] > 0:
          df_testinfill = autoMLer[autoML_type][ML_application]['predict'](ML_cmnd, model, df_test_fillfeatures, printstatus, list(range(df_train_filllabel.shape[1])))

          df_testinfill = \
          self.convert_onehot_to_1010(df_testinfill)

        else:
          #this needs to have same number of columns as text category
          df_testinfill = np.zeros(shape=(1,len(categorylist)))

      #convert infill values to dataframe
      df_traininfill = pd.DataFrame(df_traininfill, columns = categorylist)
      df_testinfill = pd.DataFrame(df_testinfill, columns = categorylist)
      
    #if target category excluded from ML infill:
    if MLinfilltype in ['exclude', 'boolexclude', 'totalexclude']:

      #create empty sets for now
      #an extension of this method would be to implement a comparable infill \
      #method for the time category, based on the columns output from the \
      #preprocessing
      df_traininfill = pd.DataFrame({'infill' : [0]}) 
      df_testinfill = pd.DataFrame({'infill' : [0]}) 

      model = False
    
    return df_traininfill, df_testinfill, model, postprocess_dict

  def createMLinfillsets(self, df_train, df_test, column, trainNArows, testNArows, \
                         category, randomseed, postprocess_dict, columnslist = [], \
                         categorylist = []):
    '''
    #update createMLinfillsets as follows:
    #instead of diferientiation by category, do a test for whether categorylist = []
    #if so do a single column transform excluding those other columns from columnslist
    #in the sets comparable to , otherwise do a transform comparable to text category
    #createMLinfillsets(df_train, df_test, column, trainNArows, testNArows, \
    #category, columnslist = []) function that when fed dataframes of train and\
    #test sets, column id, df of True/False corresponding to rows from original \
    #sets with missing values, a string category of 'text', 'date', 'nmbr', or \
    #'bnry', and a list of column id's for the text category if applicable. The \
    #function returns a seris of dataframes which can be applied to training a \
    #machine learning model to predict apppropriate infill values for those points \
    #that had missing values from the original sets, indlucing returns of \
    #df_train_filltrain, df_train_filllabel, df_train_fillfeatures, \
    #and df_test_fillfeatures
    '''
    
    MLinfilltype = postprocess_dict['process_dict'][category]['MLinfilltype']
    
    #create 3 new dataframes for each train column - the train and labels \
    #for rows not needing infill, and the features for rows needing infill \
    #also create a test features column 
    
    #categories are nmbr, bnry, text, date, bxcx, bins, bint, NArw, null
    #if category in ['nmbr', 'bxcx', 'bnry', 'text', 'bins', 'bint']:
    
    #if category in ['nmbr', 'nbr2', 'bxcx', 'bnry', 'text', 'bins', 'bint']:
    if MLinfilltype in ['numeric', 'singlct', 'binary', \
                        'multirt', '1010', \
                        'concurrent_act', 'concurrent_nmbr']:

      #if this is a single column set or concurrent_act
      if len(categorylist) == 1 or \
      postprocess_dict['process_dict'][category]['MLinfilltype'] in ['concurrent_act', 'concurrent_nmbr']:

        #first concatinate the NArows True/False designations to df_train & df_test
        df_train = pd.concat([df_train, trainNArows], axis=1)
        df_test = pd.concat([df_test, testNArows], axis=1)

        #create copy of df_train to serve as training set for fill
        df_train_filltrain = df_train.copy()
        #now delete rows coresponding to True
        df_train_filltrain = df_train_filltrain[df_train_filltrain[trainNArows.columns[0]] == False]

        #now delete columns = columnslist and the NA labels (orig column+'_NArows') from this df
        df_train_filltrain = df_train_filltrain.drop(columnslist, axis=1)
        df_train_filltrain = df_train_filltrain.drop([trainNArows.columns[0]], axis=1)

        #create a copy of df_train[column] for fill train labels
        df_train_filllabel = pd.DataFrame(df_train[column].copy())
        #concatinate with the NArows
        df_train_filllabel = pd.concat([df_train_filllabel, trainNArows], axis=1)
        #drop rows corresponding to True
        df_train_filllabel = df_train_filllabel[df_train_filllabel[trainNArows.columns[0]] == False]

        #delete the NArows column
        df_train_filllabel = df_train_filllabel.drop([trainNArows.columns[0]], axis=1)

        #create features df_train for rows needing infill
        #create copy of df_train (note it already has NArows included)
        df_train_fillfeatures = df_train.copy()
        #delete rows coresponding to False
        df_train_fillfeatures = df_train_fillfeatures[(df_train_fillfeatures[trainNArows.columns[0]])]
        #delete columnslist and column+'_NArows'
        df_train_fillfeatures = df_train_fillfeatures.drop(columnslist, axis=1)
        df_train_fillfeatures = df_train_fillfeatures.drop([trainNArows.columns[0]], axis=1)

        #create features df_test for rows needing infill
        #create copy of df_test (note it already has NArows included)
        df_test_fillfeatures = df_test.copy()
        #delete rows coresponding to False
        df_test_fillfeatures = df_test_fillfeatures[(df_test_fillfeatures[testNArows.columns[0]])]
        #delete column and column+'_NArows'
        df_test_fillfeatures = df_test_fillfeatures.drop(columnslist, axis=1)
        df_test_fillfeatures = df_test_fillfeatures.drop([testNArows.columns[0]], axis=1)

        #delete NArows from df_train, df_test
        df_train = df_train.drop([trainNArows.columns[0]], axis=1)
        df_test = df_test.drop([testNArows.columns[0]], axis=1)

      #else if categorylist wasn't single entry
      else:

        #create a list of columns representing columnslist exlucding elements from
        #categorylist
        noncategorylist = columnslist[:]
        #this removes categorylist elements from noncategorylist
        noncategorylist = list(set(noncategorylist).difference(set(categorylist)))

        #first concatinate the NArows True/False designations to df_train & df_test
        df_train = pd.concat([df_train, trainNArows], axis=1)
        df_test = pd.concat([df_test, testNArows], axis=1)

        #create copy of df_train to serve as training set for fill
        df_train_filltrain = df_train.copy()
        #now delete rows coresponding to True
        df_train_filltrain = df_train_filltrain[df_train_filltrain[trainNArows.columns[0]] == False]

        #now delete columns = columnslist and the NA labels (orig column+'_NArows') from this df
        df_train_filltrain = df_train_filltrain.drop(columnslist, axis=1)
        df_train_filltrain = df_train_filltrain.drop([trainNArows.columns[0]], axis=1)

        #create a copy of df_train[categorylist] for fill train labels
        df_train_filllabel = df_train[categorylist].copy()
        #concatinate with the NArows
        df_train_filllabel = pd.concat([df_train_filllabel, trainNArows], axis=1)
        #drop rows corresponding to True
        df_train_filllabel = df_train_filllabel[df_train_filllabel[trainNArows.columns[0]] == False]

        #delete the NArows column
        df_train_filllabel = df_train_filllabel.drop([trainNArows.columns[0]], axis=1)

        #create features df_train for rows needing infill
        #create copy of df_train (note it already has NArows included)
        df_train_fillfeatures = df_train.copy()
        #delete rows coresponding to False
        df_train_fillfeatures = df_train_fillfeatures[(df_train_fillfeatures[trainNArows.columns[0]])]
        #delete columnslist and column+'_NArows'
        df_train_fillfeatures = df_train_fillfeatures.drop(columnslist, axis=1)
        df_train_fillfeatures = df_train_fillfeatures.drop([trainNArows.columns[0]], axis=1)

        #create features df_test for rows needing infill
        #create copy of df_test (note it already has NArows included)
        df_test_fillfeatures = df_test.copy()
        #delete rows coresponding to False
        df_test_fillfeatures = df_test_fillfeatures[(df_test_fillfeatures[testNArows.columns[0]])]
        #delete column and column+'_NArows'
        df_test_fillfeatures = df_test_fillfeatures.drop(columnslist, axis=1)
        df_test_fillfeatures = df_test_fillfeatures.drop([testNArows.columns[0]], axis=1)

        #delete NArows from df_train, df_test
        df_train = df_train.drop([trainNArows.columns[0]], axis=1)
        df_test = df_test.drop([testNArows.columns[0]], axis=1)

    #if MLinfilltype in ['exclude']:
    else:

      #create empty sets for now
      #an extension of this method would be to implement a comparable method \
      #for the time category, based on the columns output from the preprocessing
      df_train_filltrain = pd.DataFrame({'foo' : []}) 
      df_train_filllabel = pd.DataFrame({'foo' : []})
      df_train_fillfeatures = pd.DataFrame({'foo' : []})
      df_test_fillfeatures = pd.DataFrame({'foo' : []})
    
    return df_train_filltrain, df_train_filllabel, df_train_fillfeatures, df_test_fillfeatures

  def insertinfill(self, df, column, infill, category, NArows, postprocess_dict, \
                   columnslist = [], categorylist = [], singlecolumncase = False):
    '''
    #uses the boolean indicators for presence of infill in NArows to apply infill
    #passed in infill dataframe to df[column]
    #note that infill dataframe is multicolumn when categorylist length > 1
    #and singlecolumn case is False
    #singlecolumn case is for special case (used in adjinfill) when we want to 
    #override the categorylist >1 methods
    '''
    
    MLinfilltype = postprocess_dict['process_dict'][category]['MLinfilltype']
    
    #NArows column name uses original column name + _NArows as key
    NArowcolumn = NArows.columns[0]

    #if category in ['nmbr', 'nbr2', 'bxcx', 'bnry', 'text']:
    if MLinfilltype in ['numeric', 'singlct', 'binary', \
                        'multirt', '1010', \
                        'concurrent_act', 'concurrent_nmbr']:

      #if this is a single column set (not categorical)
      if len(categorylist) == 1 or singlecolumncase is True \
      or MLinfilltype in ['concurrent_act', 'concurrent_nmbr']:
        
        #create new dataframe for infills wherein the infill values are placed in \
        #rows coresponding to NArows True values and rows coresponding to NArows \
        #False values are filled with a 0    

        #assign index values to a column
        NArows['tempindex1'] = df.index

        #create list of index numbers coresponding to the NArows True values
        infillindex = NArows.loc[NArows[NArowcolumn]]['tempindex1']

        #create a dictionary for use to insert infill using df's index as the key
        infill_dict = dict(zip(infillindex, infill.values))

        #replace 'tempindex1' column with infill in rows where NArows is True
        NArows['tempindex1'] = np.where(NArows[NArowcolumn], NArows['tempindex1'].replace(infill_dict), 0)

        #now carry that infill over to the target column for rows where NArows is True
        df[column] = np.where(NArows[NArowcolumn], NArows['tempindex1'], df[column])

      #else if categorylist wasn't single value
      else:

        for textcolumnname in categorylist:
          
          #assign index values to a column
          NArows['tempindex1'] = df.index

          #create list of index numbers coresponding to the NArows True values
          infillindex = NArows.loc[NArows[NArowcolumn]]['tempindex1']

          #create a dictionary for use to insert infill using df's index as the key
          infill_dict = dict(zip(infillindex, infill[textcolumnname].values))

          #replace 'tempindex1' column with infill in rows where NArows is True
          NArows['tempindex1'] = np.where(NArows[NArowcolumn], NArows['tempindex1'].replace(infill_dict), 0)

          #now carry that infill over to the target column for rows where NArows is True
          df[column] = np.where(NArows[NArowcolumn], NArows['tempindex1'], df[column])

    #if category == 'date':
    if MLinfilltype in ['exclude', 'boolexclude', 'totalexclude']:
      pass

    return df

  def MLinfillfunction (self, df_train, df_test, column, postprocess_dict, \
                        masterNArows_train, masterNArows_test, randomseed, \
                        ML_cmnd, printstatus):
    '''
    #new function ML infill, generalizes the MLinfill application between categories
    #def MLinfill (df_train, df_test, column, postprocess_dict, \
    #masterNArows_train, masterNArows_test, randomseed)
    #function that applies series of functions of createMLinfillsets, 
    #predictinfill, and insertinfill to a categorical encoded set.
    #for the record I'm sure that the conversion of the single column
    #series to a dataframe is counter to the intent of pandas
    #it's probably less memory efficient but it's the current basis of
    #the functions so we're going to maintain that approach for now
    #the revision of these functions to accept pandas series is a
    #possible future extension
    '''
    
    if postprocess_dict['column_dict'][column]['infillcomplete'] is False:

      columnslist = postprocess_dict['column_dict'][column]['columnslist']
      categorylist = postprocess_dict['column_dict'][column]['categorylist']
      origcolumn = postprocess_dict['column_dict'][column]['origcolumn']
      category = postprocess_dict['column_dict'][column]['category']
      autoMLer = postprocess_dict['autoMLer']
      
      if len(categorylist) == 1 or \
      postprocess_dict['process_dict'][postprocess_dict['column_dict'][column]['category']]['MLinfilltype'] \
      in ['concurrent_act', 'concurrent_nmbr']:
        #copy the datatype to ensure returned set is consistent
        df_temp_dtype = pd.DataFrame(df_train[column][:0]).copy()

      elif len(categorylist) > 1:
        #copy the datatype to ensure returned set is consistent
        df_temp_dtype = pd.DataFrame(df_train[categorylist][:0]).copy()

      #createMLinfillsets
      df_train_filltrain, df_train_filllabel, df_train_fillfeatures, df_test_fillfeatures = \
      self.createMLinfillsets(df_train, \
                         df_test, column, \
                         pd.DataFrame(masterNArows_train[origcolumn+'_NArows']), \
                         pd.DataFrame(masterNArows_test[origcolumn+'_NArows']), \
                         category, randomseed, postprocess_dict, \
                         columnslist = columnslist, \
                         categorylist = categorylist)

      #predict infill values using defined function predictinfill(.)
      df_traininfill, df_testinfill, model, postprocess_dict = \
      self.predictinfill(category, df_train_filltrain, df_train_filllabel, \
                        df_train_fillfeatures, df_test_fillfeatures, randomseed, \
                        postprocess_dict, ML_cmnd, autoMLer, printstatus, categorylist = categorylist)

      #now we'll add our trained model to the postprocess_dict
      postprocess_dict['column_dict'][column]['infillmodel'] \
      = model

      #note: we're only saving trained model in the postprocess_dict for one 
      #of columns from multicolumn set to reduce file size
      
      #only insert infill if we have a valid model
      if model is not False:

        #apply the function insertinfill(.) to insert missing value predicitons \
        #to df's associated column
        df_train = self.insertinfill(df_train, column, df_traininfill, category, \
                              pd.DataFrame(masterNArows_train[origcolumn+'_NArows']), \
                              postprocess_dict, columnslist = columnslist, \
                              categorylist = categorylist)

        #if we don't train the train set model on any features, that we won't be able 
        #to apply the model to predict the test set infill. 

        if any(x == True for x in masterNArows_train[origcolumn+'_NArows']):

          df_test = self.insertinfill(df_test, column, df_testinfill, category, \
                             pd.DataFrame(masterNArows_test[origcolumn+'_NArows']), \
                             postprocess_dict, columnslist = columnslist, \
                             categorylist = categorylist)

      #now change the infillcomplete marker in the text_dict for each \
      #associated text column unless in concurrent_activations MLinfilltype
      if postprocess_dict['process_dict'][postprocess_dict['column_dict'][column]['category']]['MLinfilltype'] \
      in ['concurrent_act', 'concurrent_nmbr']:
        
        postprocess_dict['column_dict'][column]['infillcomplete'] = True

        #now we'll add our trained text model to the postprocess_dict
        postprocess_dict['column_dict'][column]['infillmodel'] \
        = model
        
      else:
        
        for columnname in categorylist:
          postprocess_dict['column_dict'][columnname]['infillcomplete'] = True

          #now we'll add our trained text model to the postprocess_dict
          postprocess_dict['column_dict'][columnname]['infillmodel'] \
          = model

#         #now change the infillcomplete marker in the dict for each associated column
#         for columnname in categorylist:
#           postprocess_dict['column_dict'][columnname]['infillcomplete'] = True

      #reset data type to ensure returned data is consistent with what was passed
      if len(categorylist) == 1 or \
      postprocess_dict['process_dict'][postprocess_dict['column_dict'][column]['category']]['MLinfilltype'] \
      in ['concurrent_act', 'concurrent_nmbr']:
        df_train[column] = \
        df_train[column].astype({column:df_temp_dtype[column].dtypes})
        
        df_test[column] = \
        df_test[column].astype({column:df_temp_dtype[column].dtypes})

      elif len(categorylist) > 1:
        for dtype_column in categorylist:
          df_train[dtype_column] = \
          df_train[dtype_column].astype({dtype_column:df_temp_dtype[dtype_column].dtypes})
          
          df_test[dtype_column] = \
          df_test[dtype_column].astype({dtype_column:df_temp_dtype[dtype_column].dtypes})

    return df_train, df_test, postprocess_dict

  def assemble_autoMLer(self):
    """
    #populates the "autoMLer" data structure that supports application of autoML for ML infill
    #first tier is platform e.g. 'randomforest', 'autoML_1', 'auto_ML2'
    #(currently just randomforest supported, intent is to build in support for a few autoML platforms)
    #second tier is application i.e. 'classification', 'regression'
    #third tier is action i.e. 'train', 'predict'
    #third tier is populated with associated functions, which follow convention
    
    #train:
    #model = train_function(ML_cmnd, df_train_filltrain, df_train_filllabel, randomseed, printstatus)
    
    #predict:
    #infill = predict_function(ML_cmnd, model, df_train_fillfeatures, printstatus)

    #the intent is to incorproate some additional autoML options here in future extension
    
    #note that binary encoded sets use onehotclassification by way of 1010->text conversion in predictinfill function
    """
    
    autoMLer = {}
    
    autoMLer.update({'randomforest' : {'booleanclassification'  : {'train'   : self.train_randomforest_classifier, \
                                                                   'predict' : self.predict_randomforest_classifier}, \
                                       'ordinalclassification'  : {'train'   : self.train_randomforest_classifier, \
                                                                   'predict' : self.predict_randomforest_classifier}, \
                                       'onehotclassification'   : {'train'   : self.train_randomforest_classifier, \
                                                                   'predict' : self.predict_randomforest_classifier}, \
                                       'regression'             : {'train'   : self.train_randomforest_regressor, \
                                                                   'predict' : self.predict_randomforest_regressor}}, 
                     'autogluon'    : {'booleanclassification'  : {'train'   : self.train_autogluon, \
                                                                   'predict' : self.predict_autogluon}, \
                                       'ordinalclassification'  : {'train'   : self.train_autogluon, \
                                                                   'predict' : self.predict_autogluon}, \
                                       'onehotclassification'   : {'train'   : self.train_autogluon, \
                                                                   'predict' : self.predict_autogluon}, \
                                       'regression'             : {'train'   : self.train_autogluon, \
                                                                   'predict' : self.predict_autogluon}}})
    
    return autoMLer

  def train_randomforest_classifier(self, ML_cmnd, df_train_filltrain, df_train_filllabel, randomseed, printstatus):
    """
    #performs tuning if appropriate based on ML_cmnd
    #initializes model
    #trains model
    
    #uses scikit-learn random forest models
    
    #where tuning is activated by passing parameters to the model as lists or distributions instead of distinct values
    #and uses grid search or random search based on ML_cmnd
    #see also ML_cmnd documentation in read me
    
    #determination of whether parameters passed as targets for tuning is by inspect_ML_cmnd function
    #and if tuning applied aggregation of entries distinguishing tuning targets use assemble_param_sets function
    
    #in short, in addition to parameters passed to model
    #ML_cmnd also accepts arguments for hyperparam_tuner and randomCV_n_iter
    #where hyperparam_tuner can be one of {False, 'gridCV', 'randomCV'}
    #and randomCV_n_iter can be passed as an integer when hyperparam_tuner passed as randomCV
    
    #model initialization makes use of initRandomForestClassifier function
    #and default values for Random Forest Classifer are initialized with populateMLinfilldefaults
    """
    
    df_train_filltrain = df_train_filltrain.values
    df_train_filllabel = df_train_filllabel.values
    
    #sometimes may be one column, then need ravel flattening
    if df_train_filllabel.shape[1] == 1:
      df_train_filllabel = np.ravel(df_train_filllabel)
    
    #initialize defaults dictionary, these are the default parameters for random forest model initialization
    MLinfilldefaults = \
    self.populateMLinfilldefaults(randomseed)
    
    #ML_cmnd accepts specification for type of tuner when hyperparaemter tuning applied, else defaults to gridCV
    if 'hyperparam_tuner' in ML_cmnd:
      MLinfill_tuner = ML_cmnd['hyperparam_tuner']
    else:
      ML_cmnd.update({'hyperparam_tuner' : 'gridCV'})
      MLinfill_tuner = 'gridCV'
      
    #if randomCV tuner applied, number of iterations accepted as randomCV_n_iter, else defaults to 100
    if MLinfill_tuner == 'randomCV':
      if 'randomCV_n_iter' not in ML_cmnd:
        ML_cmnd.update({'randomCV_n_iter' : 100})
      randomCV_n_iter = ML_cmnd['randomCV_n_iter']
    
    autoML_type = ML_cmnd['autoML_type']
    MLinfill_alg = 'RandomForestClassifier'
    
    #tune marker tells us if user passed some parameters as a list for hyperparameter tuning
    tune_marker = self.inspect_ML_cmnd(ML_cmnd, autoML_type, MLinfill_alg)
    
    if tune_marker is True:
    
      #static_params are user passed parameters that won't be tuned, 
      #tune_params are user passed params (passed as list or range) that will be tuned
      static_params, tune_params = self.assemble_param_sets(ML_cmnd, autoML_type, MLinfill_alg)
    
      #we'll create a temp ML_cmnd to initialize a tuning model
      temp_ML_cmnd = {'MLinfill_cmnd':{MLinfill_alg : static_params}}
      
      #then we'll initialize a tuning model
      #note that this populates the parameters to be tuned with defaults
      #my understanding is that scikit gridsearch still allows tuning for parameters
      #that were previously initialized in the model
      tuning_model = self.initRandomForestClassifier(temp_ML_cmnd, MLinfilldefaults)
    
      #for now we'll default to grid scoring of ‘accuracy’
      #I've heard that F1 score is a better general default, but not sure how it handles edge cases
      #need to do a little more investsigation on this point
      grid_scoring = 'accuracy'
      
      #now we'll initialize a grid search
      if MLinfill_tuner == 'gridCV':
        tune_search = GridSearchCV(tuning_model, cv=5, iid='deprecated', \
                                   param_grid = tune_params, scoring = grid_scoring)
      elif MLinfill_tuner == 'randomCV':
        tune_search = RandomizedSearchCV(tuning_model, cv=5, iid='deprecated', \
                                     param_distributions = tune_params, scoring = grid_scoring, \
                                     n_iter = randomCV_n_iter)
      else:
        print("error: hyperparam_tuner currently only supports 'gridCV' or 'randomCV'.")      
      
      #now we'll run a fit on the grid search
      #for now won't pass any fit parameters
      fit_params = {}
      tune_search.fit(df_train_filltrain, df_train_filllabel, **fit_params)    
    
      #acess the tuned parameters based on the tuning operation
      tuned_params = tune_search.best_params_    

      if printstatus is True:

        #print("")
        print("tuned parameters:")
        print(tuned_params)
        print("")

      #now assemble final static params by incorporating the tuned params
      static_params.update(tuned_params)

      #now initialize our tuned model
      #first create another temp_ML_cmnd for the tuned set
      temp_ML_cmnd_two = {'MLinfill_cmnd':{MLinfill_alg : static_params}}

      model = self.initRandomForestClassifier(temp_ML_cmnd_two, MLinfilldefaults)

    else:
      
      model = self.initRandomForestClassifier(ML_cmnd, MLinfilldefaults)

    model.fit(df_train_filltrain, df_train_filllabel)

    return model

  def predict_randomforest_classifier(self, ML_cmnd, model, fillfeatures, printstatus, categorylist=[]):
    """
    #runs and inference operation
    #on corresponding model trained in train_randomforest_classifier
    #for random forest
    #returns infill predictions

    #the categorylist parameter is used to handle an edge case for when predict_autogluon is called
    """
    
    fillfeatures = fillfeatures.values
    
    infill = model.predict(fillfeatures)
    
    return infill

  def train_randomforest_regressor(self, ML_cmnd, df_train_filltrain, df_train_filllabel, randomseed, printstatus):
    """
    #performs tuning if appropriate based on ML_cmnd
    #initializes model
    #trains model
    
    #uses scikit-learn random forest models
    
    #where tuning is activated by passing parameters to the model as lists or distributions instead of distinct values
    #and uses grid search or random search based on ML_cmnd
    #see also ML_cmnd documentation in read me
    
    #determination of whether parameters passed as targets for tuning is by inspect_ML_cmnd function
    #and if tuning applied aggregation of entries distinguishing tuning targets use assemble_param_sets function
    
    #in short, in addition to parameters passed to model
    #ML_cmnd also accepts arguments for hyperparam_tuner and randomCV_n_iter
    #where hyperparam_tuner can be one of {False, 'gridCV', 'randomCV'}
    #and randomCV_n_iter can be passed as an integer when hyperparam_tuner passed as randomCV
    
    #model initialization makes use of initRandomForestRegressor function
    #and default values for Random Forest Regressor are initialized with populateMLinfilldefaults
    """
    
    df_train_filltrain = df_train_filltrain.values
    df_train_filllabel = df_train_filllabel.values
    
    #single label column needs to be flattened from [[#,...]] to [#,...] with ravel
    df_train_filllabel = np.ravel(df_train_filllabel)
    
    #initialize defaults dictionary, these are the default parameters for random forest model initialization
    MLinfilldefaults = \
    self.populateMLinfilldefaults(randomseed)
    
    #ML_cmnd accepts specification for type of tuner when hyperparaemter tuning applied, else defaults to gridCV
    if 'hyperparam_tuner' in ML_cmnd:
      MLinfill_tuner = ML_cmnd['hyperparam_tuner']
    else:
      ML_cmnd.update({'hyperparam_tuner' : 'gridCV'})
      MLinfill_tuner = 'gridCV'
      
    #if randomCV tuner applied, number of iterations accepted as randomCV_n_iter, else defaults to 100
    if MLinfill_tuner == 'randomCV':
      if 'randomCV_n_iter' not in ML_cmnd:
        ML_cmnd.update({'randomCV_n_iter' : 100})
      randomCV_n_iter = ML_cmnd['randomCV_n_iter']
    
    autoML_type = ML_cmnd['autoML_type']
    MLinfill_alg = 'RandomForestRegressor'
    
    #tune marker tells us if user passed some parameters as a list for hyperparameter tuning
    tune_marker = self.inspect_ML_cmnd(ML_cmnd, autoML_type, MLinfill_alg)
    
    if tune_marker is True:
    
      #static_params are user passed parameters that won't be tuned, 
      #tune_params are user passed params (passed as list or range) that will be tuned
      static_params, tune_params = self.assemble_param_sets(ML_cmnd, autoML_type, MLinfill_alg)
    
      #we'll create a temp ML_cmnd to initialize a tuning model
      temp_ML_cmnd = {'MLinfill_cmnd':{MLinfill_alg : static_params}}
      
      #then we'll initialize a tuning model
      #note that this populates the parameters to be tuned with defaults
      #my understanding is that scikit gridsearch still allows tuning for parameters
      #that were previously initialized in the model
      tuning_model = self.initRandomForestRegressor(temp_ML_cmnd, MLinfilldefaults)
    
      #for now we'll default to grid scoring of ‘neg_mean_squared_error’
      #am not positive this is best default this is worth some further investigation when get a chance
      grid_scoring = 'neg_mean_squared_error'
      
      #now we'll initialize a grid search
      if MLinfill_tuner == 'gridCV':
        tune_search = GridSearchCV(tuning_model, cv=5, iid='deprecated', \
                                   param_grid = tune_params, scoring = grid_scoring)
      elif MLinfill_tuner == 'randomCV':
        tune_search = RandomizedSearchCV(tuning_model, cv=5, iid='deprecated', \
                                     param_distributions = tune_params, scoring = grid_scoring, \
                                     n_iter = randomCV_n_iter)
      else:
        print("error: hyperparam_tuner currently only supports 'gridCV' or 'randomCV'.")      
      
      #now we'll run a fit on the grid search
      #for now won't pass any fit parameters
      fit_params = {}
      tune_search.fit(df_train_filltrain, df_train_filllabel, **fit_params)    
    
      #acess the tuned parameters based on the tuning operation
      tuned_params = tune_search.best_params_    

      if printstatus is True:

        #print("")
        print("tuned parameters:")
        print(tuned_params)
        print("")

      #now assemble final static params by incorporating the tuned params
      static_params.update(tuned_params)

      #now initialize our tuned model
      #first create another temp_ML_cmnd for the tuned set
      temp_ML_cmnd_two = {'MLinfill_cmnd':{MLinfill_alg : static_params}}

      model = self.initRandomForestRegressor(temp_ML_cmnd_two, MLinfilldefaults)

    else:
      
      model = self.initRandomForestRegressor(ML_cmnd, MLinfilldefaults)

    model.fit(df_train_filltrain, df_train_filllabel)

    return model

  def predict_randomforest_regressor(self, ML_cmnd, model, fillfeatures, printstatus, categorylist=[]):
    """
    #runs and inference operation
    #on corresponding model trained in train_randomforest_classifier
    #for random forest
    #returns infill predictions

    #the categorylist parameter is used to handle an edge case for when predict_autogluon is called
    """
    
    fillfeatures = fillfeatures.values
    
    infill = model.predict(fillfeatures)
    
    return infill

  def train_autogluon(self, ML_cmnd, df_train_filltrain, df_train_filllabel, randomseed, printstatus):
    """
    #Trains a model for ML infill using AutoGluon library
    #assumes that AutoGluon is imported external to the automunge(.) function call as
    
    import autogluon.core as ag
    from autogluon.tabular import TabularPrediction as task
    
    #currently applies default parameters to training operation, extended parameter support pending
    
    #same function used for both classification and regression relying on AutoGluon to infer label type
    """

    # import autogluon.core as ag
    from autogluon.tabular import TabularPrediction as task

    try:
      
      #column headers matter for convert_onehot_to_singlecolumn methods, reset as integers
      #I'm not sure why simply renaming columns to integers doesn't work here
      # df_train_filltrain.columns = list(range(len(list(df_train_filltrain.columns))))
      # df_train_filllabel.columns = list(range(len(list(df_train_filllabel.columns))))
      df_train_filltrain = pd.DataFrame(df_train_filltrain.values)
      df_train_filllabel = pd.DataFrame(df_train_filllabel.values)

      df_train_filltrain.columns = ['train_' + str(x) for x in list(df_train_filltrain.columns)]
      
      ag_label_column = list(df_train_filllabel.columns)

      if len(ag_label_column) == 1:
        ag_label_column = ag_label_column[0]
      else:
        df_train_filllabel = self.convert_onehot_to_singlecolumn(df_train_filllabel)
        ag_label_column = list(df_train_filllabel.columns)[0]

      #autogluon accepts labels as part of training set
      df_train_filltrain = pd.concat([df_train_filltrain, df_train_filllabel], axis=1)

      # #now get name of columns, ag_label_column is the label, ag_trainset_columns is the other columns
      # ag_trainset_columns = list(df_train_filltrain.columns)
      # ag_label_column = ag_trainset_columns[-1]
      # ag_trainset_columns.remove(ag_label_column)

      #apply the autogluon data set loader
      df_train_filltrain = task.Dataset(df_train_filltrain)

      #train the model
      model = task.fit(train_data=df_train_filltrain, label=ag_label_column)
      
      return model
        
    except ValueError:
      return False

  def predict_autogluon(self, ML_cmnd, model, fillfeatures, printstatus, categorylist=[]):
    """
    #runs and inference operation
    #on corresponding model trained in train_AutoGluon_classifier
    #for AutoGluon
    #returns infill predictions

    #the categorylist parameter is used to handle an edge case
    #note that in some cases the passed categorylist may be a proxy list of equivalent length
    #such as a range of integers
    """

    # import autogluon.core as ag
    from autogluon.tabular import TabularPrediction as task
    
    if model is not False:
      
      # fillfeatures.columns = list(range(len(list(fillfeatures.columns))))
      fillfeatures = pd.DataFrame(fillfeatures.values)

      fillfeatures.columns = ['train_' + str(x) for x in list(fillfeatures.columns)]

      #load dataset
      fillfeatures = task.Dataset(fillfeatures)
      
      try:
        infill = model.predict(fillfeatures)
        
        if len(categorylist) > 1:
          
          infill = self.convert_singlecolumn_to_onehot(infill, categorylist)
        
    #     infill = np.array(infill)
        
        return infill
      
      except ValueError:

        return np.zeros(shape=(fillfeatures.shape[0],len(categorylist)))
    
    else:

      infill = np.zeros(shape=(1,len(categorylist)))
      
      return infill

  def convert_onehot_to_singlecolumn(self, df):
    """
    #support function for autoML libraries that don't accept multicolumn labels
    #converts onehot encoded sets to single column
    #with entries corresponding to the column header
    #for cases where a row did not have an entry (such as all zeros)
    #we'll populate with -1
    #which since these are dervied from a numpy set won't overlap with headers
    """
    
    df[-1] = -1
    
    for column in df:
      if column != -1:
        df[-1] = np.where(df[column]==1, column, df[-1])
      
    df2 = pd.DataFrame(df[-1].copy())
    df2 = df2.rename(columns = {-1:'labels'})
    
    df2['labels'] = df2['labels'].astype(str)
        
    return df2

  def convert_singlecolumn_to_onehot(self, df, columnslist):
    """
    #support function for autoML libraries that don't accept multicolumn labels
    #converts single column encoded sets back to onehot
    #with entries corresponding to the column header
    #where the entries will be
    #for cases where a row did not have an entry (such as all zeros)
    #we'll populate with -1
    #which since these are dervied from a numpy set won't overlap with headers
    """
    
    df = pd.DataFrame(df)

    df[0] = df[0].astype(int)
    df = df.rename(columns = {0:'labels'})
    
    df2 = pd.DataFrame(np.zeros(shape = (df.shape[0], len(columnslist))))
    
    df2.columns = list(range(len(columnslist)))
    
    df = pd.concat([df, df2], axis=1)
    
    del df2
    
    for entry in list(range(len(columnslist))):
      df[entry] = np.where(df['labels'] == entry, 1, 0)
      
    del df['labels']

    df = df.values
    
    return df

  def convert_1010_to_onehot(self, df_array):  
    """
    takes as input dataframe encoded in 1010 format
    and translates to a one-hot encoding equivalent
    with number of columns based on 2^n where n is number of 1010 columns
    and potentially with columns with all 0
    """

    received_column_count = df_array.shape[1]

    #initialize a column to store encodings
    #this relies on convention that received columns with suffix appenders have '_' included to ensure no overlap
    df_array['-1'] = ''

    #populate column to store encodings 
    for column in df_array.columns:
      if column != '-1':
        df_array['-1'] = \
        df_array['-1'] + df_array[column].astype(int).astype(str)

    #discard other columns
    df_array = pd.DataFrame(df_array['-1'])

    #create list of columns for the encoding with binary encodings
    #this will be full list of range of values based on number of 1010 columns
    #postprocess_textsupport_class support function needs string headers
    #this relies on convention that received columns with suffix appenders have '_' included to ensure no overlap
    textcolumns = list(range(2**received_column_count))
    textcolumns = ['-1' + str(format(item, f"0{received_column_count}b")) for item in textcolumns]

    df_onehot = \
    self.postprocess_textsupport_class(df_array, '-1', {}, 'tempkey', {'textcolumns':textcolumns})

    del df_onehot['-1']

    return df_onehot
  
  def convert_onehot_to_1010(self, np_onehot):
    """
    takes as input numpy array encoded in one-hot format
    and translates to a 1010 encoding equivalent
    based on assumption that order of columns consistent per 
    convention of convert_1010_to_onehot(.)
    """
    
#     #if not all zeros (all zeros is an edge case)
#     if np_onehot.any():

    #create list of binary encodings corresponding to the onehot array
    #assumes consistent order of columns from convert_1010_to_onehot basis
    columnslist = list(range(np_onehot.shape[1]))
    columnslist = \
    [str(format(item, f"0{int(np.log2(np_onehot.shape[1]))}b")) for item in columnslist]

    #convert to dataframe with columnslist as column headers
    df_array = pd.DataFrame(np_onehot, columns = columnslist)

    #create new column to store encodings
    df_array['1010'] = 0

    #copy columns headers to activated cells, others are 0
    for column in df_array:

      if column != '1010':

        df_array[column].replace(1, column, inplace=True)

        df_array['1010'] = \
        np.where(df_array[column] != 0, df_array[column], df_array['1010'])

        del df_array[column]

#       uniquevalues = df_array['1010'].unique()
#       uniquevalues.sort()
#       uniquevalues = list(uniquevalues)

#       #get number of 1010 columns
#       nbrcolumns = len(str(uniquevalues[0]))

    nbrcolumns = int(np.ceil(np.log2(np_onehot.shape[1])))
  
    #replace zeros with infill partition (a string of zeros of lenth nmbrcolumns)
    #note this corresponds to the default infill encoding for '1010'
    infill_plug = '0' * nbrcolumns
    df_array['1010'] = np.where(df_array.eq(0).all(1), infill_plug, df_array['1010'])

    _1010_columns = []
    for i in range(nbrcolumns):
      _1010_columns.append('1010_'+str(i))

    df_array['1010'] = df_array['1010'].astype(str)

    #now let's store the encoding
    i=0
    for _1010_column in _1010_columns:

      df_array[_1010_column] = df_array['1010'].str.slice(i,i+1).astype(np.int8)

      i+=1

    del df_array['1010']

    np_1010 = df_array.values
    
#     #else if np_onehot was all zeros (edge case)
#     else:
      
#       nbrcolumns = int(np.ceil(np.log2(np_onehot.shape[1])))
      
#       np_1010 = \
#       np.zeros((np_onehot.shape[0], nbrcolumns))

    return np_1010

  def LabelSetGenerator(self, df, column, label):
    '''
    #LabelSetGenerator
    #takes as input dataframe for test set, label column name, and label
    #returns a dataframe set of all rows which included that label in the column
    '''
    
    df = df[df[column] == label]

    return df

  def LabelFrequencyLevelizer(self, train_df, labels_df, \
                                postprocess_dict, process_dict, LabelSmoothing):
    """
    #LabelFrequencyLevelizer(.)
    #takes as input dataframes for train set, labels, and label category
    #combines them to single df, then creates sets for each label category
    #such as to add on multiples of each set to achieve near levelized
    #frequency of label occurence in training set (increases the size
    #of the training set by redundant inclusion of rows with lower frequency
    #labels.) Returns train_df, labels_df, trainID_df.
    #for now have convention that MLinfilltypes of 1010 or concurrent_act
    #not yet supported (future extension)
    """
    
    columns_labels = list(labels_df)
    
    #find origcateogry of am_labels from FSpostprocess_dict
    labelcolumnkey = list(labels_df)[0]
    origcolumn = postprocess_dict['column_dict'][labelcolumnkey]['origcolumn']
    origcategory = postprocess_dict['column_dict'][labelcolumnkey]['origcategory']

    #find labelctgy from process_dict based on this origcategory
    labelscategory = process_dict[origcategory]['labelctgy']
    
    MLinfilltype = postprocess_dict['process_dict'][labelscategory]['MLinfilltype']
    
    labels = list(labels_df)
    #labels.sort()
    
    if labels != []:

      setnameslist = []
      setlengthlist = []
      multiplierlist = []

      #if labelscategory == 'bnry':
      if MLinfilltype in ['singlct', 'binary']:
        
        singlctcolumn = False
        
        if len(labels) == 1:
          singlctcolumn = labels[0]
        else:
          for labelcolumn in labels:
            if postprocess_dict['column_dict'][labelcolumn]['category'] == labelscategory:
              singlctcolumn = labelcolumn
          #if the label category is custom processdict entry with improperly specced labelctgy just apply this heuristic (remote edge case)
          if singlctcolumn is False:
            print("label category processdict entry contained a labelctgy not found in the transformdict entry, applying heuristic")
            print()
            singlctcolumn = labels[0]
        
        uniquevalues = list(labels_df[singlctcolumn].unique())

        #for label in labels:
        #for label in [0,1]:
        for label in uniquevalues:
          
          #value = 
          
          #derive set of labels dataframe for counting length
          df = self.LabelSetGenerator(labels_df, singlctcolumn, label)

          #append length onto list
          setlength = df.shape[0]
          #setlengthlist = setlengthlist.append(setlength)
          setlengthlist.append(setlength)

        #length of biggest label set
        maxlength = max(setlengthlist)
        #set counter to 0
        i = 0
        #for label in labels:
        #for label in [0,1]:
        for label in uniquevalues:
          #derive multiplier to levelize label frequency
          setlength = setlengthlist[i]
          if setlength > 0:
            
            labelmultiplier = int(round(maxlength / setlength)) - 1
          else:
            labelmultiplier = 0
          #append multiplier onto list
          #multiplierlist = multiplierlist.append(labelmultiplier)
          multiplierlist.append(labelmultiplier)
          #increment counter
          i+=1

        #concatinate labels onto train set
        train_df = pd.concat([train_df, labels_df], axis=1)

        #reset counter
        i=0
        #for loop through labels
        #for label in labels:
        #for label in [0,1]:
        for label in uniquevalues:

          #create train subset corresponding to label
          df = self.LabelSetGenerator(train_df, singlctcolumn, label)

          #set j counter to 0
          j = 0
          #concatinate an additional copy of the label set multiplier times
          while j < multiplierlist[i]:
            train_df = pd.concat([train_df, df], axis=0)
            #train_df = train_df.reset_index()
            j+=1
            
          i+=1

        #now seperate the labels df from the train df
        labels_df = pd.DataFrame(train_df[labels].copy())
        #now delete the labels column from train set
        for labelcolumn in labels:
          del train_df[labelcolumn]

      #if labelscategory in ['nmbr', 'bxcx']:
      if MLinfilltype in ['numeric']:

        columns_labels = []
        for label in labels_df.columns:
          #here we're checking if the column is a numneric set aggregated bins
          if postprocess_dict['process_dict'][postprocess_dict['column_dict'][label]['category']]['MLinfilltype'] \
          in ['multirt', 'concurrent_act']:
          
            columns_labels.append(label)
            
      #if labelscategory in ['text', 'nmbr', 'bxcx']:
      if MLinfilltype in ['numeric', 'multirt']:
        if columns_labels != []:
          
          #note for. label smoothing activation values won't be 1
          level_activation = LabelSmoothing
          if level_activation <= 0.0 \
          or level_activation >= 1.0 \
          or str(level_activation) == 'False':
            level_activation = 1
          
          i=0
          #for label in labels:
          for label in columns_labels:
            
            column = columns_labels[i]
            #derive set of labels dataframe for counting length
            df = self.LabelSetGenerator(labels_df, column, level_activation)
            
            #append length onto list
            setlength = df.shape[0]
            
            #setlengthlist = setlengthlist.append(setlength)
            setlengthlist.append(setlength)

            i+=1

          #length of biggest label set
          maxlength = max(setlengthlist)

          #set counter to 0
          i = 0
          #for label in labels:
          for label in columns_labels:

            #derive multiplier to levelize label frequency
            setlength = setlengthlist[i]
            if setlength > 0:
              labelmultiplier = int(round(maxlength / setlength)) - 1
            else:
              labelmultiplier = 0
            #append multiplier onto list
            #multiplierlist = multiplierlist.append(labelmultiplier)
            multiplierlist.append(labelmultiplier)
            #increment counter
            i+=1

          #concatinate labels onto train set
          train_df = pd.concat([train_df, labels_df], axis=1)

          #reset counter
          i=0
          #for loop through labels
          
          #for label in labels:
          for label in columns_labels:

            #create train subset corresponding to label
            column = columns_labels[i]
            df = self.LabelSetGenerator(train_df, column, level_activation)

            #set j counter to 0
            j = 0
            #concatinate an additional copy of the label set multiplier times
            while j < multiplierlist[i]:
              train_df = pd.concat([train_df, df], axis=0)
              #train_df = train_df.reset_index()
              j+=1

            i+=1

          columns_labels = list(labels_df)

          #now seperate the labels df from the train df
          labels_df = train_df[columns_labels]
          #now delete the labels column from train set
          train_df = train_df.drop(columns_labels, axis=1)

    return train_df, labels_df
  
  def trainFSmodel(self, am_subset, am_labels, randomseed, \
                   process_dict, postprocess_dict, labelctgy, ML_cmnd, printstatus):
    
    if len(list(am_labels)) > 0:

      df_train_fillfeatures_plug = pd.DataFrame(am_subset[:][:1].copy())
      df_test_fillfeatures_plug = pd.DataFrame(am_subset[:][:1].copy())
      categorylist = postprocess_dict['column_dict'][list(am_labels)[0]]['categorylist']

      _infilla, _infillb, FSmodel, postprocess_dict = \
      self.predictinfill(labelctgy, am_subset, am_labels, \
                         df_train_fillfeatures_plug, df_test_fillfeatures_plug, \
                         randomseed, postprocess_dict, ML_cmnd, postprocess_dict['autoMLer'], printstatus, \
                         categorylist = categorylist)

      del _infilla, _infillb
      
    else:
      
      FSmodel = False
    
    return FSmodel
  
  def createFSsets(self, am_subset, column, columnslist, randomseed):
    '''
    very simply shuffles rows of columns from columnslist with randomseed
    then returns the resulting dataframe
    
    hat tip for permutation method from "Beware Default Random Forest Importances"
    by Terence Parr, Kerem Turgutlu, Christopher Csiszar, and Jeremy Howard
    '''
    
    shuffleset = am_subset.copy()
    
    for clcolumn in columnslist:
      
      #uses support function
      shuffleset = self.df_shuffle_series(shuffleset, clcolumn, randomseed)
      
    return shuffleset

  def createFSsets2(self, am_subset, column, columnslist, randomseed):
    '''
    similar to createFSsets except performed such as to only leave one column from
    the columnslist untouched and shuffle the rest 
    '''
    shuffleset2 = am_subset.copy()
    
    for clcolumn in columnslist:
        
      if clcolumn != column:
            
        #uses support function
        shuffleset2 = self.df_shuffle_series(shuffleset2, clcolumn, randomseed)
    
    return shuffleset2

  def shuffleaccuracy(self, np_shuffleset, np_labels, FSmodel, randomseed, label_categorylist, \
                      process_dict, labelctgy, postprocess_dict):
    '''
    measures accuracy of predictions of shuffleset (which had permutation method)
    against the model trained on the unshuffled set

    np_shuffleset and np_labels are now recast as pandas dataframe, leaving the "np" in place for convenience
    '''

    ML_cmnd = postprocess_dict['ML_cmnd']

    autoMLer = postprocess_dict['autoMLer']
    
    categorylist_for_predict = label_categorylist

    printstatus_for_predict = postprocess_dict['printstatus']

    #if autoML_type not specified than we'll apply default (randomforest)
    #note this is only a temporary update to ML_cmnd and is not returned from function call
    if 'autoML_type' not in postprocess_dict['ML_cmnd']:
      postprocess_dict['ML_cmnd'].update({'autoML_type' : 'randomforest'})
    #grab autoML_type from ML_cmnd, this will be one of our keys for autoMLer dictionary
    autoML_type = postprocess_dict['ML_cmnd']['autoML_type']

    labelscategory = labelctgy
    MLinfilltype = process_dict[labelscategory]['MLinfilltype']
    
    if MLinfilltype in ['numeric', 'concurrent_nmbr']:
      ML_application = 'regression'
    elif MLinfilltype in ['singlct']:
      ML_application = 'ordinalclassification'
    elif MLinfilltype in ['binary', 'concurrent_act']:
      ML_application = 'booleanclassification'
    elif MLinfilltype in ['multirt', '1010']:
      ML_application = 'onehotclassification'
    
    #if labelscategory in ['nmbr']:
    if MLinfilltype in ['numeric', 'concurrent_nmbr']:
      
      #generate predictions
      np_predictions = autoMLer[autoML_type][ML_application]['predict'](ML_cmnd, FSmodel, np_shuffleset, printstatus_for_predict, categorylist_for_predict)
      #np_predictions = FSmodel.predict(np_shuffleset)
      
      #just in case this returned any negative predictions
      np_predictions = np.absolute(np_predictions)
      #and we're trying to generalize here so will go ahead and apply to labels
      np_labels = np.absolute(np_labels)
      
      #evaluate accuracy metric
      #columnaccuracy = accuracy_score(np_labels, np_predictions)
      #columnaccuracy = mean_squared_error(np_labels, np_predictions)
      #columnaccuracy = mean_squared_log_error(np_labels, np_predictions)
      columnaccuracy = 1 - mean_squared_log_error(np_labels, np_predictions)
      
    #if labelscategory in ['bnry']:
    if MLinfilltype in ['singlct', 'binary', 'concurrent_act']:
      
      #generate predictions
      np_predictions = autoMLer[autoML_type][ML_application]['predict'](ML_cmnd, FSmodel, np_shuffleset, printstatus_for_predict, categorylist_for_predict)
      #np_predictions = FSmodel.predict(np_shuffleset)
      
      #evaluate accuracy metric
      columnaccuracy = accuracy_score(np_labels, np_predictions)
      
    #if labelscategory in ['text']:
    if MLinfilltype in ['multirt']:
      
      #generate predictions
      np_predictions = autoMLer[autoML_type][ML_application]['predict'](ML_cmnd, FSmodel, np_shuffleset, printstatus_for_predict, categorylist_for_predict)
      #np_predictions = FSmodel.predict(np_shuffleset)
      
      #evaluate accuracy metric
      #columnaccuracy = accuracy_score(np_labels, np_predictions)
      columnaccuracy = accuracy_score(np_labels, np_predictions)

    if MLinfilltype in ['1010']:
      
      np_labels = \
      self.convert_1010_to_onehot(np_labels)
      
      #generate predictions
      np_predictions = autoMLer[autoML_type][ML_application]['predict'](ML_cmnd, FSmodel, np_shuffleset, printstatus_for_predict, categorylist_for_predict)
      #np_predictions = FSmodel.predict(np_shuffleset)
      
      #evaluate accuracy metric
      #columnaccuracy = accuracy_score(np_labels, np_predictions)
      columnaccuracy = accuracy_score(np_labels, np_predictions)
        
    #I think this will clear some memory
    del np_labels, np_shuffleset
    
    return columnaccuracy
  
  def assemblemadethecut(self, FScolumn_dict, featurepct, featuremetric, featuremethod, \
                         am_subset_columns, FSprocess_dict):
    '''
    takes as input the FScolumn_dict and the passed automunge argument featurepct
    and a list of the columns from automunge application in featureselect
    and uses to assemble a list of columns that made it through the feature
    selection process
    
    returns list madethecut
    '''
    
    #create empty dataframe for sorting purposes
    FSsupport_df = pd.DataFrame(columns=['FS_column', 'metric', 'category'])
    
    #Future extension:
    #FSsupport_df = pd.DataFrame(columns=['FS_column', 'metric', 'metric2', 'category'])
    
    #add rows to the dataframe for each column
    for key in FScolumn_dict:
      
      column_df = pd.DataFrame([[key, FScolumn_dict[key]['metric'], FScolumn_dict[key]['category']]], \
                               columns=['FS_column', 'metric', 'category'])
  
      FSsupport_df = pd.concat([FSsupport_df, column_df], axis=0)
    
    #sort the rows by metric (from large to small, not that higher metric implies
    #more predictive power associated with that column's feature)
    #(note that NaN rows will have NaN values at bottom of list)
    FSsupport_df = FSsupport_df.sort_values(['metric'], ascending=False)
    
    #create list of candidate entries for madethecut
    candidates = list(FSsupport_df['FS_column'])
    
    #count the total number of rows
    totalrowcount =  FSsupport_df.shape[0]
    #count ranked rows
    metriccount = totalrowcount
    
    #create list of NArws
    #candidateNArws = candidates[-NaNcount:]
#     candidateNArws = list(FSsupport_df[FSsupport_df['category']=='NArw']['FS_column'])
    candidateNArws = list()
    
    #create list of feature rows
    #candidatefeaturerows = candidates[:-NaNcount]
#     candidatefeaturerows = list(FSsupport_df[FSsupport_df['category']!='NArw']['FS_column'])
    candidatefeaturerows = list(FSsupport_df['FS_column'])
    
#     #calculate the number of features we'll keep using the ratio passed from automunge
#     numbermakingcut = int(metriccount * featurepct)
    
    if featuremethod not in ['default', 'pct', 'metric', 'report']:
      print("error featuremethod object must be one of ['default', 'pct', 'metric', 'report']")
      
    if featuremethod == 'default':

      #calculate the number of features we'll keep using the ratio passed from automunge
      numbermakingcut = len(FSsupport_df)
    
    if featuremethod == 'pct':

      #calculate the number of features we'll keep using the ratio passed from automunge
      numbermakingcut = int(metriccount * featurepct)
      
    if featuremethod == 'metric':
      
      #calculate the number of features we'll keep using the ratio passed from automunge
      numbermakingcut = len(FSsupport_df[FSsupport_df['metric'] >= featuremetric])
      
    if featuremethod == 'report':
      #just a plug vlaue
      numbermakingcut = 1
      
    #generate list of rows making the cut
    madethecut = candidatefeaturerows[:numbermakingcut]
    
    #this is to retain full sets if any 1010 sets returned
    madethecut_copy = madethecut.copy()
    for entry in madethecut_copy:
      if FSprocess_dict[FScolumn_dict[entry]['category']]['MLinfilltype'] == '1010':
        if not set(FScolumn_dict[entry]['categorylist']).issubset(set(madethecut)):
          for entry2 in FScolumn_dict[entry]['categorylist']:
            if entry2 not in madethecut:
              madethecut.append(entry2)
    
    return madethecut

  def featureselect(self, df_train, labels_column, trainID_column, \
                    powertransform, binstransform, randomseed, \
                    numbercategoryheuristic, assigncat, transformdict, \
                    processdict, featurepct, featuremetric, featuremethod, \
                    ML_cmnd, process_dict, valpercent1, valpercent2, printstatus, \
                    NArw_marker, assignparam):
    """
    featureselect is a function called within automunge() that applies methods
    to evaluate predictive power of derived features towards a downstream model
    such as to trim the branches of the transform tree.
    
    The function returns a list of column names that "made the cut" so that
    automunge() can then remove extraneous branches.
    """
    
    #now we'll use automunge() to prepare the subset for feature evaluation
    #note the passed arguments, these are all intentional (no MLinfill applied,
    #primary goal here is to produce a processed dataframe for df_subset
    #with corresponding labels)
    
    #printout display progress
    if printstatus is True:
      print("_______________")
      print("Begin Feature Importance evaluation")
      print("")
    
    if labels_column is False:
      
      FSmodel = False

      baseaccuracy = False
      
      #printout display progress
      if printstatus is True:
        print("_______________")
        print("No labels_column passed, Feature Importance halted")
        print("")
    
    elif labels_column is not False:

      #but first real quick we'll just deal with PCA default functionality for FS
      FSML_cmnd = deepcopy(ML_cmnd)
      FSML_cmnd['PCA_type'] = 'off'

      FS_LabelSmoothing = False

  #     #FUTURE EXTENSION
  #     #user has option to turn off LabelSmoothing for feature importance evaluation such as by passing False
  #     FS_LabelSmoothing = 0.9
  #     if 'LabelSmoothing' in ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']:
  #       FS_LabelSmoothing = ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']['LabelSmoothing']

      FS_assignparam = deepcopy(assignparam)

      totalvalidation = valpercent1 + valpercent2

      if totalvalidation == 0:
        totalvalidation = 0.2

      am_train, _1, am_labels, \
      am_validation1, _3, am_validationlabels1, \
      _5, _6, _7, \
      _8, _9, _10, \
      labelsencoding_dict, finalcolumns_train, _10,  \
      _11, FSpostprocess_dict = \
      self.automunge(df_train, df_test = False, labels_column = labels_column, trainID_column = trainID_column, \
                    testID_column = False, valpercent1 = totalvalidation, valpercent2 = 0.0, \
                    shuffletrain = True, TrainLabelFreqLevel = False, powertransform = powertransform, \
                    binstransform = binstransform, MLinfill = False, infilliterate=1, randomseed = randomseed, \
                    LabelSmoothing_train = FS_LabelSmoothing, excl_suffix = True, \
                    numbercategoryheuristic = numbercategoryheuristic, pandasoutput = True, NArw_marker = NArw_marker, \
                    featureselection = False, featurepct = 1.00, featuremetric = featuremetric, \
                    featuremethod = 'pct', ML_cmnd = FSML_cmnd, assigncat = assigncat, \
                    assigninfill = {'stdrdinfill':[], 'MLinfill':[], 'zeroinfill':[], 'oneinfill':[], \
                                   'adjinfill':[], 'meaninfill':[], 'medianinfill':[]}, \
                    assignparam = FS_assignparam, \
                    transformdict = transformdict, processdict = processdict, printstatus=printstatus)

      #this is the returned process_dict
      #(remember "processdict" is what we pass to automunge() call, "process_dict" is what is 
      #assembled inside automunge, there is a difference)
      FSprocess_dict = FSpostprocess_dict['process_dict']

      if am_labels.empty is True:
        FSmodel = False

        baseaccuracy = False
        
        #printout display progress
        if printstatus is True:
          print("_______________")
          print("No labels returned from automunge(.), Feature Importance halted")
          print("")
    
      #if am_labels is not an empty set
      if am_labels.empty is False:

        #find origcateogry of am_labels from FSpostprocess_dict
        labelcolumnkey = list(am_labels)[0]
        origcolumn = FSpostprocess_dict['column_dict'][labelcolumnkey]['origcolumn']
        origcategory = FSpostprocess_dict['column_dict'][labelcolumnkey]['origcategory']

        #find labelctgy from process_dict based on this origcategory
        labelctgy = process_dict[origcategory]['labelctgy']

        am_categorylist = []

        for am_label_column in am_labels.columns:

          if FSpostprocess_dict['column_dict'][am_label_column]['category'] == labelctgy:

            am_categorylist = FSpostprocess_dict['column_dict'][am_label_column]['categorylist']
            
            #we'll follow convention that if target label category MLinfilltype is concurrent
            #we'll arbitrarily take the first column and use that as target
            if FSpostprocess_dict['process_dict'][labelctgy]['MLinfilltype'] \
            in ['concurrent_act', 'concurrent_nmbr']:
              
              am_categorylist = [am_categorylist[0]]
              
            break

        if len(am_categorylist) == 0:
          if printstatus is True:
            #this is a remote edge case, printout added for troubleshooting support
            print("Label category processdict entry contained a labelctgy entry not found in transformdict entry")
            print("Feature Seclection model training will not run without valid labelgctgy processdict entry")
            print()

        elif len(am_categorylist) == 1:
          am_labels = pd.DataFrame(am_labels[am_categorylist[0]])
          am_validationlabels1 = pd.DataFrame(am_validationlabels1[am_categorylist[0]])

        else:
          am_labels = am_labels[am_categorylist]
          am_validationlabels1 = am_validationlabels1[am_categorylist]

        #if there's a bug occuring after this point it might mean the labelctgy wasn't
        #properly populated in the process_dict for the root category assigned to the labels
        #again the labelctgy entry to process_dict represents for labels returned in 
        #multiple configurations the trasnofrmation category whose returned set will be
        #used to train the feature selection model

        #printout display progress
        if printstatus is True:
          print("_______________")
          print("Training feature importance evaluation model")
          print("")

        #apply function trainFSmodel
        #FSmodel, baseaccuracy = \
        FSmodel = \
        self.trainFSmodel(am_train, am_labels, randomseed, \
                          FSprocess_dict, FSpostprocess_dict, labelctgy, ML_cmnd, \
                          printstatus)
        
        if FSmodel is False:
          
          FScolumn_dict = {}
          
          FS_origcolumns = list(FSpostprocess_dict['origcolumn'])

          baseaccuracy = False
          
          #printout display progress
          if printstatus is True:
            print("_______________")
            print("No model returned from training, Feature Importance halted")
            print("")
          
        
        elif FSmodel is not False:

          #update v2.11 baseaccuracy should be based on validation set
          baseaccuracy = self.shuffleaccuracy(am_validation1, am_validationlabels1, \
                                              FSmodel, randomseed, am_categorylist, \
                                              FSprocess_dict, labelctgy, FSpostprocess_dict)

          if printstatus is True:
            print("Base Accuracy of feature importance model:")
            print(baseaccuracy)
            print()

          #get list of columns
          am_train_columns = list(am_train)

          #initialize dictionary FScolumn_dict = {}
          FScolumn_dict = {}
          
          FS_origcolumns = list(FSpostprocess_dict['origcolumn'])
          
          #assemble FScolumn_dict to support the feature evaluation
          for column in am_train_columns:

            #pull categorylist, category, columnslist
            categorylist = FSpostprocess_dict['column_dict'][column]['categorylist']
            category = FSpostprocess_dict['column_dict'][column]['category']
            columnslist = FSpostprocess_dict['column_dict'][column]['columnslist']
            origcolumn = FSpostprocess_dict['column_dict'][column]['origcolumn']

            #create entry to FScolumn_dict
            FScolumn_dict.update({column : {'categorylist' : categorylist, \
                                            'category' : category, \
                                            'columnslist' : columnslist, \
                                            'origcolumn' : origcolumn, \
                                            'FScomplete' : False, \
                                            'shuffleaccuracy' : None, \
                                            'shuffleaccuracy2' : None, \
                                            'baseaccuracy' : baseaccuracy, \
                                            'metric' : None, \
                                            'metric2' : None}})
            
          #this is for assemblemadethecut
          FSprocess_dict = FSpostprocess_dict['process_dict']

          #printout display progress
          if printstatus is True:
            print("_______________")
            print("Evaluating feature importances")
            print("")

          #perform feature evaluation on each column
          for column in am_train_columns:

            if FScolumn_dict[column]['FScomplete'] is False:

              #categorylist = FScolumn_dict[column]['categorylist']
              #update version 1.80, let's perform FS on columnslist instead of categorylist
              columnslist = FScolumn_dict[column]['columnslist']

              #create set with columns shuffle from columnslist
              #shuffleset = self.createFSsets(am_train, column, categorylist, randomseed)
              #shuffleset = self.createFSsets(am_train, column, columnslist, randomseed)
              shuffleset = self.createFSsets(am_validation1, column, columnslist, randomseed)

              #determine resulting accuracy after shuffle
              columnaccuracy = self.shuffleaccuracy(shuffleset, am_validationlabels1, \
                                                    FSmodel, randomseed, am_categorylist, \
                                                    FSprocess_dict, labelctgy, FSpostprocess_dict)

              #I think this will clear some memory
              del shuffleset

              #category accuracy penalty metric
              metric = baseaccuracy - columnaccuracy
              #metric2 = baseaccuracy - columnaccuracy2

              #save accuracy to FScolumn_dict and set FScomplete to True
              #(for each column in the categorylist)
              #for categorycolumn in FSpostprocess_dict['column_dict'][column]['categorylist']:
              for categorycolumn in FSpostprocess_dict['column_dict'][column]['columnslist']:

                FScolumn_dict[categorycolumn]['FScomplete'] = True
                FScolumn_dict[categorycolumn]['shuffleaccuracy'] = columnaccuracy
                FScolumn_dict[categorycolumn]['metric'] = metric
                #FScolumn_dict[categorycolumn]['shuffleaccuracy2'] = columnaccuracy2
                #FScolumn_dict[categorycolumn]['metric2'] = metric2

            columnslist = FScolumn_dict[column]['columnslist']

            #create second set with all but one columns shuffled from columnslist
            #this will allow us to compare the relative importance between columns
            #derived from the same parent
            #shuffleset2 = self.createFSsets2(am_train, column, columnslist, randomseed)
            shuffleset2 = self.createFSsets2(am_validation1, column, columnslist, randomseed)

            #determine resulting accuracy after shuffle
            columnaccuracy2 = self.shuffleaccuracy(shuffleset2, am_validationlabels1, \
                                                  FSmodel, randomseed, am_categorylist, \
                                                  FSprocess_dict, labelctgy, FSpostprocess_dict)

            metric2 = baseaccuracy - columnaccuracy2

            FScolumn_dict[column]['shuffleaccuracy2'] = columnaccuracy2
            FScolumn_dict[column]['metric2'] = metric2
          
          madethecut = self.assemblemadethecut(FScolumn_dict, featurepct, featuremetric, \
                                           featuremethod, am_train_columns, FSprocess_dict)
    
    #if the only column left in madethecut from origin column is a NArw, delete from the set
    #(this is going to lean on the column ID string naming conventions)
    #couldn't get this to work, this functionality a future extension
#     trimfrommtc = []
#     for traincolumn in list(df_train):
#       if (traincolumn + '_') not in [checkmtc[:(len(traincolumn)+1)] for checkmtc in madethecut]:
#         for mtc in madethecut:
#           #if mtc originated from traincolumn
#           if mtc[:(len(traincolumn)+1)] == traincolumn + '_':
#             #count the number of same instance in madethecut set
#             madethecut_trim = [mdc_trim[:(len(traincolumn)+1)] for mdc_trim in madethecut]
#             if madethecut_trim.count(mtc[:(len(traincolumn)+1)]) == 1 \
#             and mtc[-5:] == '_NArw':
#               trimfrommtc = trimfrommtc + [mtc]
#     madethecut = list(set(madethecut).difference(set(trimfrommtc)))
       
    #apply function madethecut(FScolumn_dict, featurepct)
    #return madethecut
    #where featurepct is the percent of features that we intend to keep
    #(might want to make this a passed argument from automunge)
    
        #I think this will clear some memory
        del am_train, _1, am_labels, am_validation1, _3, \
        am_validationlabels1, _5, _6, _7, \
        _8, _9, labelsencoding_dict, finalcolumns_train, _10,  \
        FSpostprocess_dict

        if printstatus is True:
          print("_______________")
          print("Feature Importance results:")
          print("")

        #to inspect values returned in featureimportance object one could run
        if printstatus is True:
          for keys,values in FScolumn_dict.items():
            print(keys)
            print('metric = ', values['metric'])
            print('metric2 = ', values['metric2'])
            print("")

    FS_sorted = {'baseaccuracy' : baseaccuracy, \
                 'metric_key':{}, \
                 'column_key':{}, \
                 'metric2_key':{}, \
                 'metric2_column_key':{}}
    
    #first we'll handle first metric based on source column
    for FS_origcolumn in FS_origcolumns:
      for key in FScolumn_dict:
        if FScolumn_dict[key]['origcolumn'] == FS_origcolumn:
          if FScolumn_dict[key]['metric'] in FS_sorted['metric_key']:
            if isinstance(FS_sorted['metric_key'][FScolumn_dict[key]['metric']], list):
              FS_sorted['metric_key'][FScolumn_dict[key]['metric']].append(FS_origcolumn)
            else:
              FS_sorted['metric_key'][FScolumn_dict[key]['metric']] = \
              [FS_sorted['metric_key'][FScolumn_dict[key]['metric']]]
              FS_sorted['metric_key'][FScolumn_dict[key]['metric']].append(FS_origcolumn)
          else:
            FS_sorted['metric_key'].update({FScolumn_dict[key]['metric'] : [FS_origcolumn]})
          break
      
    FS_sorted['metric_key'] = dict(sorted(FS_sorted['metric_key'].items(), reverse=True))
    
    for key in FS_sorted['metric_key']:
      for entry in FS_sorted['metric_key'][key]:
        entry_index = FS_sorted['metric_key'][key].index(entry)
        FS_sorted['column_key'].update({FS_sorted['metric_key'][key][entry_index] : key})
    
    #now for metric2 based on derived columns relative importance, note sorted in other order
    for FS_origcolumn in FS_origcolumns:
      FS_sorted['metric2_key'].update({FS_origcolumn : {}})
      for key in FScolumn_dict:
        if FScolumn_dict[key]['origcolumn'] == FS_origcolumn:
          if FScolumn_dict[key]['metric2'] in FS_sorted['metric2_key'][FS_origcolumn]:
            if isinstance(FS_sorted['metric2_key'][FS_origcolumn][FScolumn_dict[key]['metric2']], list):
              FS_sorted['metric2_key'][FS_origcolumn][FScolumn_dict[key]['metric2']].append(key)
            else:
              FS_sorted['metric2_key'][FS_origcolumn][FScolumn_dict[key]['metric2']] = \
              [FS_sorted['metric2_key'][FS_origcolumn][FScolumn_dict[key]['metric2']]]
              FS_sorted['metric2_key'][FS_origcolumn][FScolumn_dict[key]['metric2']].append(key)
          else:
            FS_sorted['metric2_key'][FS_origcolumn].update({FScolumn_dict[key]['metric2'] : [key]})
    
    for key in FS_sorted['metric2_key']:
      FS_sorted['metric2_key'][key] = dict(sorted(FS_sorted['metric2_key'][key].items(), reverse=False))
    
    for key1 in FS_sorted['metric2_key']:
      FS_sorted['metric2_column_key'].update({key1 : {}})
      for key2 in FS_sorted['metric2_key'][key1]:
        for entry in FS_sorted['metric2_key'][key1][key2]:
          entry_index = FS_sorted['metric2_key'][key1][key2].index(entry)
          FS_sorted['metric2_column_key'][key1].update({FS_sorted['metric2_key'][key1][key2][entry_index] : key2})
    
    if printstatus is True:
      print()
      print("______________________")
      print("sorted metric results:")
      print()
      for keys,values in FS_sorted['metric_key'].items():
        for entry in values:
          print(entry)
          print(keys)
          print()
      print("______________________")
      print("sorted metric2 results:")
      print()
      for key in FS_sorted['metric2_key']:
        print("for source column: ", key)
        for keys,values in FS_sorted['metric2_key'][key].items():
          for entry in values:
            print(entry)
            print(keys)
            print()
        print()
    
    if FSmodel is False:
      
      madethecut = []
      FScolumn_dict = {}
    
    #printout display progress
    if printstatus is True:
      print("_______________")
      print("Feature Importance evaluation complete")
      print("")
    
    return madethecut, FSmodel, FScolumn_dict, FS_sorted
  
  def assemblepostprocess_assigninfill(self, assigninfill, infillcolumns_list, 
                                       columns_train, postprocess_dict, MLinfill):
    """
    #this function converts user passed assigninfill
    #into a collection of post-transform column assignments
    #to various infill methods for application in apply infill functions
    
    #assigninfill is as passed by user
    #(note assigninfill previously had validations performed in check_assigninfill)
    #infillcolumns_list is all dervied columns in train set
    #columns_train is all source columns in train set
    #postprocess_dict is how data shared between functions
    #MLinfill is boolean marker for default MLinfill applciation
    
    #The convention is that unspecified columns are cast to
    #stndrdinfill or MLinfill based on MLinfill parameter
    #The other convention is that user may assign column headers
    #both with and without suffix appenders
    #the source column headers are converted to set of dervied column headers
    #and then any user specified derived columns w/ suffix take precendence over the converted ones
    
    #so workflow is as follows
    #- received spec'd assigninfill (which may include both pre-suf and w/-suf)
    #- aggregate just spec'd w/-suf into mirror spec assigninfill_withsuffix
    #- aggregate spec'd pre-suf into mirror spec assigninfill_sourcecolumn
    #- identify unspecified source columns missing from assigninfill_sourcecolumn,
    #add as new category assigninfill_sourcecolumn['unspecified']
    #- convert source columns to dervied columns from assigninfill_sourcecolumn
    #to assigninfill_sourcecolumn_converted
    #- if assigninfill_sourcecolumn_converted doesn't yet have entries for stdrdinfill or MLinfill, create
    #- based on MLinfill, copy entries from assigninfill_sourcecolumn_converted['unspecified']
    #into either 'stdrdinfill' or 'MLinfill'
    #(we'll keep 'unspecified' entry in case might be of use down the road)
    #- for duplicates between entries to assigninfill_withsuffix and assigninfill_sourcecolumn_converted
    #assigninfill_withsuffix takes precedence
    #so cycle through and if duplicates found remove from assigninfill_sourcecolumn_converted
    #- combine assigninfill_withsuffix and assigninfill_sourcecolumn_converted into
    #the returned set postprocess_assigninfill_dict
    #- insert any missing keys needed for apply_am_infill
    #- return postprocess_assigninfill_dict
    """
    
    #- received spec'd assigninfill (which may include both pre-suf and w/-suf)
    
    #- aggregate just spec'd w/-suf into mirror spec assigninfill_withsuffix
    assigninfill_withsuffix = {}
    for key in assigninfill:
      assigninfill_withsuffix.update({key:[]})
      for entry in assigninfill[key]:
        if entry in infillcolumns_list:
          assigninfill_withsuffix[key].append(entry)
          
    #- aggregate spec'd pre-suf into mirror spec assigninfill_sourcecolumn
    assigninfill_sourcecolumn = {}
    for key in assigninfill:
      assigninfill_sourcecolumn.update({key:[]})
      for entry in assigninfill[key]:
        if entry in columns_train:
          assigninfill_sourcecolumn[key].append(entry)
          
    #- identify unspecified source columns missing from assigninfill_sourcecolumn,
    #add as new category assigninfill_sourcecolumn['unspecified']
    specd_sourcecolumns = []
    for key in assigninfill_sourcecolumn:
      specd_sourcecolumns += assigninfill_sourcecolumn[key]
    unspecd_sourcecolumns = list(set(columns_train) - set(specd_sourcecolumns))
    assigninfill_sourcecolumn.update({'unspecified':unspecd_sourcecolumns})
    
    #- convert source columns to dervied columns from assigninfill_sourcecolumn
    #to assigninfill_sourcecolumn_converted
    assigninfill_sourcecolumn_converted = {}
    for key in assigninfill_sourcecolumn:
      assigninfill_sourcecolumn_converted.update({key:[]})
      for entry in assigninfill_sourcecolumn[key]:
        #accessing dervied columns from source column, 
        #adding as entries to assigninfill_sourcecolumn_converted[key]
        assigninfill_sourcecolumn_converted[key] += postprocess_dict['origcolumn'][entry]['columnkeylist']
        
    #- if assigninfill_sourcecolumn_converted doesn't yet have entries for stdrdinfill or MLinfill, create
    if 'stdrdinfill' not in assigninfill_sourcecolumn_converted:
      assigninfill_sourcecolumn_converted.update({'stdrdinfill':[]})
    if 'MLinfill' not in assigninfill_sourcecolumn_converted:
      assigninfill_sourcecolumn_converted.update({'MLinfill':[]})
    
    #- based on MLinfill, copy entries from assigninfill_sourcecolumn_converted['unspecified']
    #into either 'stdrdinfill' or 'MLinfill'
    #(we'll keep 'unspecified' entry in case might be of use down the road)
    if MLinfill is True:
      assigninfill_sourcecolumn_converted['MLinfill'] += assigninfill_sourcecolumn_converted['unspecified']
    else:
      assigninfill_sourcecolumn_converted['stdrdinfill'] += assigninfill_sourcecolumn_converted['unspecified']
      
    #- for duplicates between entries to assigninfill_withsuffix and assigninfill_sourcecolumn_converted
    #assigninfill_withsuffix takes precedence
    #so cycle through and if duplicates found remove from assigninfill_sourcecolumn_converted
    all_specd_withsuffix = []
    for key in assigninfill_withsuffix:
      all_specd_withsuffix += assigninfill_withsuffix[key]
    for key in assigninfill_sourcecolumn_converted:
      for entry in assigninfill_sourcecolumn_converted[key]:
        if entry in all_specd_withsuffix:
          assigninfill_sourcecolumn_converted[key].remove(entry)
          
    #- combine assigninfill_withsuffix and assigninfill_sourcecolumn_converted into
    #the returned set postprocess_assigninfill_dict
    
    #first let's make sure they have equivalent keys
    for key1 in assigninfill_withsuffix:
      if key1 not in assigninfill_sourcecolumn_converted:
        assigninfill_sourcecolumn_converted.update({key1:[]})
    for key2 in assigninfill_sourcecolumn_converted:
      if key2 not in assigninfill_withsuffix:
        assigninfill_withsuffix.update({key2:[]})
    
    #ok now populate 
    postprocess_assigninfill_dict = {}
    
    for key in assigninfill_sourcecolumn_converted:
      postprocess_assigninfill_dict.update({key: assigninfill_withsuffix[key] + assigninfill_sourcecolumn_converted[key]})
    
    #- insert any missing keys needed for apply_am_infill
    if 'stdrdinfill' not in postprocess_assigninfill_dict:
      postprocess_assigninfill_dict['stdrdinfill'] = []
    
    if 'zeroinfill' not in postprocess_assigninfill_dict:
      postprocess_assigninfill_dict['zeroinfill'] = []

    if 'oneinfill' not in postprocess_assigninfill_dict:
      postprocess_assigninfill_dict['oneinfill'] = []
      
    if 'naninfill' not in postprocess_assigninfill_dict:
      postprocess_assigninfill_dict['naninfill'] = []

    if 'adjinfill' not in postprocess_assigninfill_dict:
      postprocess_assigninfill_dict['adjinfill'] = []

    if 'medianinfill' not in postprocess_assigninfill_dict: 
      postprocess_assigninfill_dict['medianinfill'] = []

    if 'meaninfill' not in postprocess_assigninfill_dict: 
      postprocess_assigninfill_dict['meaninfill'] = []

    if 'modeinfill' not in postprocess_assigninfill_dict: 
      postprocess_assigninfill_dict['modeinfill'] = []
      
    if 'lcinfill' not in postprocess_assigninfill_dict:
      postprocess_assigninfill_dict['lcinfill'] = []
      
    if 'MLinfill' not in postprocess_assigninfill_dict:
      postprocess_assigninfill_dict['MLinfill'] = []
    
    #- return postprocess_assigninfill_dict
    return postprocess_assigninfill_dict
  
  def apply_am_infill(self, df_train, df_test, postprocess_assigninfill_dict, \
                      postprocess_dict, infilliterate, printstatus, infillcolumns_list, \
                      masterNArows_train, masterNArows_test, process_dict, randomseed, ML_cmnd):
    """
    #Modularizes the application of infill to train and test sets
    """

    #infilliterate allows ML infill sets to run multiple times
    #as may be beneficial if set had a high proportion of infill for instance
    iteration = 0
    if infilliterate == 0:
      infilliterate = 1
      
    #if we're uysing this method we'll have some extra printouts
    if infilliterate > 1:
      print_infilliterate = True
    else:
      print_infilliterate = False
      
    #initialize validation results
    infill_validations = {}
      
    while iteration < infilliterate:
      
      #resent MLinfill infillcomplete markers to False
      if iteration > 0:
        for key in postprocess_assigninfill_dict['MLinfill']:
          postprocess_dict['column_dict'][key]['infillcomplete'] = False
      
      if printstatus is True:
        if print_infilliterate is True:
          print("______")
          print("ML infill infilliterate iteration: ", iteration + 1)
          print(" ")
          
      for column in infillcolumns_list:
          
        if column in postprocess_dict['column_dict']:
          
          if process_dict[postprocess_dict['column_dict'][column]['category']]['MLinfilltype'] \
          not in ['boolexclude', 'totalexclude']:

            if iteration == 0:
              
              #stndrdinfill (just prinouts, this was done in processing funcitons)
              if column in postprocess_assigninfill_dict['stdrdinfill']:

                #printout display progress
                if printstatus is True:
                  print("infill to column: ", column)
                  print("     infill type: stdrdinfill")
                  print("")
                  
              #zeroinfill
              if column in postprocess_assigninfill_dict['zeroinfill']:

                #printout display progress
                if printstatus is True:
                  print("infill to column: ", column)
                  print("     infill type: zeroinfill")
                  print("")

                categorylistlength = len(postprocess_dict['column_dict'][column]['categorylist'])

                df_train = \
                self.zeroinfillfunction(df_train, column, postprocess_dict, \
                                        masterNArows_train)

                df_test = \
                self.zeroinfillfunction(df_test, column, postprocess_dict, \
                                        masterNArows_test)

              #oneinfill
              if column in postprocess_assigninfill_dict['oneinfill']:

                #printout display progress
                if printstatus is True:
                  print("infill to column: ", column)
                  print("     infill type: oneinfill")
                  print("")

                categorylistlength = len(postprocess_dict['column_dict'][column]['categorylist'])

                df_train = \
                self.oneinfillfunction(df_train, column, postprocess_dict, \
                                       masterNArows_train)

                df_test = \
                self.oneinfillfunction(df_test, column, postprocess_dict, \
                                       masterNArows_test)
                
              #naninfill
              if column in postprocess_assigninfill_dict['naninfill']:

                #printout display progress
                if printstatus is True:
                  print("infill to column: ", column)
                  print("     infill type: naninfill")
                  print("")

                categorylistlength = len(postprocess_dict['column_dict'][column]['categorylist'])

                df_train = \
                self.naninfillfunction(df_train, column, postprocess_dict, \
                                       masterNArows_train)

                df_test = \
                self.naninfillfunction(df_test, column, postprocess_dict, \
                                       masterNArows_test)

              #adjinfill
              if column in postprocess_assigninfill_dict['adjinfill']:

                #printout display progress
                if printstatus is True:
                  print("infill to column: ", column)
                  print("     infill type: adjinfill")
                  print("")

                df_train = \
                self.adjinfillfunction(df_train, column, postprocess_dict, \
                                       masterNArows_train)

                df_test = \
                self.adjinfillfunction(df_test, column, postprocess_dict, \
                                       masterNArows_test)

              #medianinfill
              if column in postprocess_assigninfill_dict['medianinfill']:

                #printout display progress
                if printstatus is True:
                  print("infill to column: ", column)
                  print("     infill type: medianinfill")
                  print("")

                #check if column is boolean
                boolcolumn = False
                #exclude boolean and ordinal from this infill method
                if postprocess_dict['process_dict'][postprocess_dict['column_dict'][column]['category']]['MLinfilltype'] \
                in ['multirt', 'singlct', 'binary', '1010', 'boolexclude', 'concurrent_act']:
                  boolcolumn = True

                categorylistlength = len(postprocess_dict['column_dict'][column]['categorylist'])

                if (categorylistlength == 1) \
                and boolcolumn is False:
                  #noting that currently we're only going to infill 0 for single column categorylists
                  #some comparable address for multi-column categories is a future extension

                  df_train, infillvalue = \
                  self.train_medianinfillfunction(df_train, column, postprocess_dict, \
                                                  masterNArows_train)

                  postprocess_dict['column_dict'][column]['normalization_dict'][column].update({'infillvalue':infillvalue})

                  df_test = \
                  self.test_medianinfillfunction(df_test, column, postprocess_dict, \
                                                 masterNArows_test, infillvalue)
              
              #meaninfill
              if column in postprocess_assigninfill_dict['meaninfill']:

                #printout display progress
                if printstatus is True:
                  print("infill to column: ", column)
                  print("     infill type: meaninfill")
                  print("")

                #check if column is boolean
                boolcolumn = False
                #exclude boolean and ordinal from this infill method
                if postprocess_dict['process_dict'][postprocess_dict['column_dict'][column]['category']]['MLinfilltype'] \
                in ['multirt', 'singlct', 'binary', '1010', 'boolexclude', 'concurrent_act']:
                  boolcolumn = True

                categorylistlength = len(postprocess_dict['column_dict'][column]['categorylist'])

                #if (column not in excludetransformscolumns) \
                if (categorylistlength == 1) \
                and boolcolumn is False:
                  #noting that currently we're only going to infill 0 for single column categorylists
                  #some comparable address for multi-column categories is a future extension

                  df_train, infillvalue = \
                  self.train_meaninfillfunction(df_train, column, postprocess_dict, \
                                                masterNArows_train)

                  postprocess_dict['column_dict'][column]['normalization_dict'][column].update({'infillvalue':infillvalue})

                  df_test = \
                  self.test_meaninfillfunction(df_test, column, postprocess_dict, \
                                               masterNArows_test, infillvalue)
              
              #modeinfill
              if column in postprocess_assigninfill_dict['modeinfill']:

                #printout display progress
                if printstatus is True:
                  print("infill to column: ", column)
                  print("     infill type: modeinfill")
                  print("")

                #check if column is excluded (variable poorly named, interpret boolcolumn here as excluded)
                boolcolumn = False
                
                #seems reasonable to exclude concurrent_nmbr from mode
                if postprocess_dict['process_dict'][postprocess_dict['column_dict'][column]['category']]['MLinfilltype'] \
                in ['boolexclude', 'concurrent_nmbr']:
                  boolcolumn = True

                if boolcolumn is False:

                  df_train, infillvalue = \
                  self.train_modeinfillfunction(df_train, column, postprocess_dict, \
                                                masterNArows_train)

                  postprocess_dict['column_dict'][column]['normalization_dict'][column].update({'infillvalue':infillvalue})

                  df_test = \
                  self.test_modeinfillfunction(df_test, column, postprocess_dict, \
                                               masterNArows_test, infillvalue)
              
              #lcinfill:
              if column in postprocess_assigninfill_dict['lcinfill']:

                #printout display progress
                if printstatus is True:
                  print("infill to column: ", column)
                  print("     infill type: lcinfill")
                  print("")

                #check if column is excluded (variable poorly named, interpret boolcolumn here as excluded)
                boolcolumn = False

                #seems reasonable to exclude concurrent_nmbr from mode
                if postprocess_dict['process_dict'][postprocess_dict['column_dict'][column]['category']]['MLinfilltype'] \
                in ['boolexclude', 'concurrent_nmbr']:
                  boolcolumn = True

                if boolcolumn is False:

                  df_train, infillvalue = \
                  self.train_lcinfillfunction(df_train, column, postprocess_dict, \
                                                masterNArows_train)

                  postprocess_dict['column_dict'][column]['normalization_dict'][column].update({'infillvalue':infillvalue})

                  #repurpose modeinfillfunction for test, only difference is the passed infillvalue
                  df_test = \
                  self.test_modeinfillfunction(df_test, column, postprocess_dict, \
                                               masterNArows_test, infillvalue)

            #MLinfill:
            if column in postprocess_assigninfill_dict['MLinfill']:

              #printout display progress
              if printstatus is True:
                print("infill to column: ", column)
                print("     infill type: MLinfill")
                print("")

              infill_validations = \
              self.check_ML_infill(df_train, column, postprocess_dict, infill_validations)

              df_train, df_test, postprocess_dict = \
              self.MLinfillfunction(df_train, df_test, column, postprocess_dict, \
                                    masterNArows_train, masterNArows_test, randomseed, ML_cmnd, \
                                    printstatus)

      for columnname in df_train.columns:
        postprocess_dict['column_dict'][columnname]['infillcomplete'] = False
      
      iteration += 1
    
    return df_train, df_test, postprocess_dict, infill_validations
  
  def apply_pm_infill(self, df_test, postprocess_assigninfill_dict, \
                      postprocess_dict, printstatus, infillcolumns_list, \
                      masterNArows_test, process_dict):
    """
    #Modularizes the application of infill to test sets
    """
    
    #infilliterate allows ML infill sets to run multiple times
    #as may be bneficial if set had a high number of infill for instance
    iteration = 0
    
    #if we're uysing this method we'll have some extra printouts
    if postprocess_dict['infilliterate'] > 1:
      print_infilliterate = True
    else:
      print_infilliterate = False
    
    #just the convention
    if postprocess_dict['infilliterate'] == 0:
      postprocess_dict['infilliterate'] = 1
    
    while iteration < postprocess_dict['infilliterate']:
      
      #resent MLinfill infillcomplete markers to False
#       if iteration > 0:
      for key in postprocess_assigninfill_dict['MLinfill']:
        postprocess_dict['column_dict'][key]['infillcomplete'] = False
      
      if printstatus is True:
        if print_infilliterate is True:
          print("______")
          print("ML infill infilliterate iteration: ", iteration + 1)
          print(" ")
    
      for column in infillcolumns_list:
        
        if process_dict[postprocess_dict['column_dict'][column]['category']]['MLinfilltype'] \
        not in ['boolexclude', 'totalexclude']:

          if iteration == 0:
            
            #stndrdinfill (just prinouts, this was done in processing funcitons)
            if column in postprocess_assigninfill_dict['stdrdinfill']:

              #printout display progress
              if printstatus is True:
                print("infill to column: ", column)
                print("     infill type: stdrdinfill")
                print("")

            #zeroinfill:
            if column i