# See Appendix for preprocessing steps.

import numpy as np
from sklearn.linear_model import LinearRegression


class DataPreprocessor(object):
    def __init__(self, num_elements, clip_percentile=95, scaler=10.0, nice_mult = 1.3, num_subselect = None):
        self.num_elements = num_elements
        self.raw_data = []
        self.clip_percentile = clip_percentile
        self.scaler = scaler
        self.clip_val = 0.0
        self.inverse_map = []
        self.nice_mult = nice_mult

        if num_subselect is None:
            self.num_subselect = self.num_elements
        else:
            self.num_subselect = num_subselect
        
    def append_data(self, elem):
        self.raw_data.append(np.array(elem))
        
    def append_data_list(self, elemlist):
        for elem in elemlist:
            self.raw_data.append(np.array(elem))
            
    def setup_preprocess(self):
        working_data_ys = np.array(self.raw_data).T

        self.clip_val = np.percentile(working_data_ys[0:self.num_subselect,:], [self.clip_percentile])[0]*self.nice_mult
        working_data_ys = np.clip(working_data_ys, 0.0, self.clip_val) / (self.clip_val / self.scaler)
        
        self.inverse_map = []
        for ii in range(working_data_ys.shape[0]):
            linreg = LinearRegression()
            xs = np.array(range(working_data_ys[ii].shape[0])).reshape((-1, 1))
            xs = xs / np.max(xs)
            linreg.fit(xs, working_data_ys[ii])
            if linreg.coef_ > 0.03:
                self.inverse_map.append(True)
            else:
                self.inverse_map.append(False)
        
    def transform_y(self, ys):
        if ys.shape[0] != self.num_elements:
            raise
        
        self.setup_preprocess()
        ys = np.clip(ys, 0.0, self.clip_val) / (self.clip_val / self.scaler)

        for ii in range(self.num_elements):
            if self.inverse_map[ii]:
                ys[ii,:] = self.scaler - ys[ii,:]
        return ys
    
    def inverse_transform(self, tys):
        if tys.shape[0] != self.num_elements:
            raise
        ys = np.array(tys)
        for ii in range(self.num_elements):
            if self.inverse_map[ii]:
                ys[ii,:] = self.scaler - ys[ii,:]
        
        ys = ys * (self.clip_val / self.scaler)
        
        return ys
    
    def inverse_transform_noscale(self, tys):
        if tys.shape[0] != self.num_elements:
            raise
        ys = np.array(tys)
        for ii in range(self.num_elements):
            if self.inverse_map[ii]:
                ys[ii,:] = self.scaler - ys[ii,:]
        
        return ys

    def inverse_transform_cov(self, cov):
        a = np.eye(self.num_elements) * ((self.clip_val / self.scaler))
        for ii in range(self.num_elements):
            if self.inverse_map[ii]:
                a[ii,ii] = a[ii,ii] * -1.0
        return np.dot(np.dot(a, cov), a.T)

    def inverse_transform_cov_noscale(self, cov):
        a = np.eye(self.num_elements)
        for ii in range(self.num_elements):
            if self.inverse_map[ii]:
                a[ii,ii] = -1.0
        return np.dot(np.dot(a, cov), a.T)

    def preprocess_dataset(self):
        return self.transform_y(np.array(self.raw_data).T)
