import numpy as np 
from sklearn import linear_model

class Sampler():
    def __init__(self, method = "RS"):
        '''
        Construct with either RS or LS, then call fit(data, M)
        To get the regression coefficient with M samples from data
        Example: 
        S = Sampler(method = "RS")
        theta = S.fit(data, 10)

        A different random sketch will be created each time
        '''

        # RS = random sampling
        # LS = leverage sampling
        if method == "RS": 
            self.fit = self._fit_random
        else: 
            self.fit = self._fit_leverage

    def _fit_random(self, dataset, n_samples): 
        X = dataset[:,:-1]
        y = dataset[:,-1]
        indices = np.random.choice(X.shape[0],size = n_samples,replace = False)
        reg = linear_model.LinearRegression(fit_intercept = False)
        reg.fit(X[indices, :], y[indices])
        return reg.coef_

    def _fit_leverage(self, dataset, n_samples):
        X = dataset[:,:-1]
        y = dataset[:,-1]
        probs = np.linalg.norm(X,axis = 1)
        probs = probs / np.sum(probs)
        indices = np.random.choice(X.shape[0],size = n_samples,replace = False, p = probs)
        reg = linear_model.LinearRegression(fit_intercept = False)
        reg.fit(X[indices, :], y[indices])
        return reg.coef_

class Clarkson2009():
    def __init__(self, m, d, debug = False):
        # m = sketch size
        # d = dimensionality
        # solves linear regression on data matrix A and y value vector B:
        # argmin || A*x - B ||
        # with a sketched version of A and B:
        # A -> A'
        # B -> B'
        # A' = S^T * A
        # B' = S^T * B
        # where S is a random matrix of +/- 1s
        # We do not materialize S
        self.D = d
        self.M = m
        self.As = np.zeros((m,d)) # A sketch
        self.Bs = np.zeros((m,1)) # B sketch
        self.debug = debug
        self.n = 0
        # np.random.seed(0)
        # self.S = (2*np.random.randint(0,2,size=(100,self.M))-1)


    def add(self, xn, yn):
        Srow = (2*np.random.randint(0,2,size=(1,self.M))-1)
        # Srow = self.S[self.n,:]
        xn = np.reshape(xn,(1,self.D))

        for i in range(self.M):
            for j in range(self.D):
                self.As[i,j] += Srow[0,i] * xn[0,j]
            self.Bs[i] += Srow[0,i] * yn

        self.n += 1

    def get_model_debug(self, x, y):
        # not streaming, just for debugging purposes
        N = x.shape[0]
        np.random.seed(0)
        S = (2*np.random.randint(0,2,size=(N,self.M))-1)
        self.As = np.dot(S.T,x)
        self.Bs = np.dot(S.T,y)
        clf = linear_model.LinearRegression()
        clf.fit(self.As,self.Bs)
        return clf.coef_

    def get_model(self):
        # print(self.As,self.Bs)
        clf = linear_model.LinearRegression()
        clf.fit(self.As,self.Bs)
        return clf.coef_

    def size(self):
        return (self.M*self.D + self.M)

    def fit(self, dataset):
        x = dataset[:,:-1]
        y = dataset[:,-1]
        self.n = x.shape[0]
        S = (2*np.random.randint(0,2,size=(self.n,self.M))-1)
        self.As = np.dot(S.T,x)
        self.Bs = np.dot(S.T,y)
        return self.get_model()

def optimal_linregress(x,y,intercept = True):
    reg = linear_model.LinearRegression()
    reg.fit(x, y)
    model = reg.coef_ 
    if intercept: 
        model = np.append(model, [reg.intercept_])
    return model
