from sklearn.ensemble import AdaBoostClassifier
import numpy as np

class OverlapMaj():
    def __init__(self, n_weak_classifiers, n_voting_classifiers=None, weak_classifier=None):
        self.n_voting_classifiers = n_voting_classifiers
        self.n_weak_classifiers = n_weak_classifiers
        self.weak_classifier = weak_classifier
        self.hypotheses = []

    def fit(self, X, y):
        if self.n_voting_classifiers is None or self.n_voting_classifiers > 3**np.ceil(np.log(len(X))/np.log(4)):
            X_subs, y_subs = self.sub_sample(X, y, np.array([]).reshape(0, X.shape[1]), np.array([]))
        else:
            X_subs, y_subs = self.rand_sub_sample(X, y, self.n_voting_classifiers)
        for i in range(len(X_subs)):
            ada = AdaBoostClassifier(estimator=self.weak_classifier, n_estimators=self.n_weak_classifiers, algorithm='SAMME')
            ada.fit(X_subs[i], y_subs[i])
            ada.n_classes_ = 2 # If subsample has only one class, AdaBoostClassifier will throw an error
            self.hypotheses.append(ada)
        self.n_voting_classifiers = len(self.hypotheses)

    def sub_sample(self, X, y, Xs, ys):
        if len(X) < 4:
            return [np.vstack([Xs, X])], [np.concatenate([ys, y])]
        X0, X1, X2, X3 = np.array_split(X, 4)
        y0, y1, y2, y3 = np.array_split(y, 4)
        a1, b1 = self.sub_sample(X0, y0, np.vstack([Xs, X2, X3]), np.concatenate([ys, y2, y3]))
        a2, b2 = self.sub_sample(X0, y0, np.vstack([Xs, X1, X3]), np.concatenate([ys, y1, y3]))
        a3, b3 = self.sub_sample(X0, y0, np.vstack([Xs, X1, X2]), np.concatenate([ys, y1, y2]))
        return a1 + a2 + a3, b1 + b2 + b3
    
    def rand_sub_sample(self, X, y, n):
        choices = np.random.choice(3**int(np.ceil(np.log(len(X))/np.log(4))), n, replace=False)
        X_subs, y_subs = [], []
        for choice in choices:
            X_sub, y_sub = self.rand_sub_sample_one(X, y, choice, np.array([]).reshape(0, X.shape[1]), np.array([]))
            X_subs.append(X_sub)
            y_subs.append(y_sub)
        return X_subs, y_subs
    
    def rand_sub_sample_one(self, X, y, choice, Xs, ys):
        if len(X) < 4:
            return np.vstack([Xs, X]), np.concatenate([ys, y])
        X0, X1, X2, X3 = np.array_split(X, 4)
        y0, y1, y2, y3 = np.array_split(y, 4)
        c = choice % 3
        choice //= 3
        if c%3 == 0:
            return self.rand_sub_sample_one(X0, y0, choice, np.vstack([Xs, X2, X3]), np.concatenate([ys, y2, y3]))
        elif c%3 == 1:
            return self.rand_sub_sample_one(X0, y0, choice, np.vstack([Xs, X1, X3]), np.concatenate([ys, y1, y3]))
        else:
            return self.rand_sub_sample_one(X0, y0, choice, np.vstack([Xs, X1, X2]), np.concatenate([ys, y1, y2]))

    def predict(self, X):
        return (sum([h.predict(X) for h in self.hypotheses]) > self.n_voting_classifiers//2)*1

    def score(self, X, y):
        return sum(self.predict(X) == y) / len(X)


if __name__ == '__main__':
    classifier = OverlapMaj(n_weak_classifiers=10, n_voting_classifiers=3)
    X = np.arange(0, 100)[:, np.newaxis]
    y = np.ones(100)
    X_subs, y_subs = classifier.rand_sub_sample(X, y, 3)
    print(X_subs)
    print()
    print(y_subs)

