import numpy as np
from sklearn.preprocessing import OneHotEncoder

def generateTwoGroupsData(alphaMin, n=200, seed=0):
    muMin = np.array([1.5, 0])
    muMaj = np.array([-1.5, 0])
    wMin  = np.array([1.5, np.sqrt(8)/3])
    wMaj  = np.array([-1.5, np.sqrt(8)/3])

    d = 2
    sigma = 0.5
    
    np.random.seed(seed)
    X = np.random.normal(0, scale=sigma, size=(n,d))
    z = np.random.choice([False, True], size=n, p=[alphaMin, 1-alphaMin])
    X[z,:] = X[z,:] + muMaj
    X[~z,:] = X[~z,:] + muMin
    y = np.zeros(n)
#    noise = np.random.normal(size = n)
    noise = np.zeros(n)
    y[z] = np.sign(np.dot(X[z,:],wMaj) - np.dot(wMaj,muMaj) + noise[z])
    y[~z] = np.sign(np.dot(X[~z,:],wMin) - np.dot(wMin,muMin) + noise[~z])

    one_hot = OneHotEncoder(sparse=False, categories='auto')
    one_hot.fit(np.array(y).reshape(-1,1))
    y_hot = one_hot.transform(np.array(y).reshape(-1,1))
    
    return X, y_hot, z