import numpy as np 
from sklearn import datasets
import matplotlib.pyplot as plt
from sklearn import preprocessing
import sys


def genMoon(n, p, k, PRINT=False):
	'''
		Generate moon dataset
		currently p is not used

		Input:
			n: datasize
			p: dimension
			s: sd of noise

		Output:
			X: nxp design matrix
			y: n response vector
			k: number of classes, currently 2 but will be made general
	'''

	# X, y = datasets.make_classification(n_samples=n, n_features=p, *, n_informative=2, n_redundant=2, n_repeated=0, n_classes=k, n_clusters_per_class=2, weights=None, flip_y=0.01, class_sep=1.0, hypercube=True, shift=0.0, scale=1.0, shuffle=True, random_state=None)
	X, y = datasets.make_moons(n_samples=n, shuffle=True, noise=0.5)
	# X,y = datasets.make_blobs(n_samples=n, centers=k, n_features=p)

	scaler = preprocessing.StandardScaler().fit(X)
	X = scaler.transform(X)

	if PRINT:
		f = plt.figure(0)
		plt.plot(X[y==0, 0], X[y==0, 1], 'k.', label='Cluster 1')
		plt.plot(X[y==1, 0], X[y==1, 1], 'r.', label='Cluster 2')
		plt.title('X1 vs X2')
		# plt.xlabel('')
		plt.legend()
		plt.show()

	return X, y, k


def gen_Logis(n, p, X=None, beta=None, PRINT=False):
	'''
		generate logistic regression data
	'''

	if X is None:
		X = np.random.normal(loc=0.0, scale=1.0, size=(n, p))

	if beta is None:
		beta = np.ones(p)

	mu = X.dot(beta)
	prob = 1/(1+np.exp(-mu))
	y = np.random.binomial(1., prob)

	if PRINT:
		f = plt.figure(0)
		plt.plot(X[y==0, 0], X[y==0, 1], 'k.', label='Cluster 1')
		plt.plot(X[y==1, 0], X[y==1, 1], 'r.', label='Cluster 2')
		plt.title('X1 vs X2')
		plt.legend()
		plt.show()
		
	return X, y, prob


def genCir(n, p, s, PRINT=False):
	'''
		Generate circle dataset
		currently p is not used, in the future, generate p-dim ball and trucate into k classes
		according to the quantiles of chi square distribution

		Input:
			n: datasize
			p: dimension
			s: sd of noise

		Output:
			X: nxp design matrix
			y: n response vector
			k: number of classes, currently 2 but will be made general
	'''

	X, y = datasets.make_circles(n_samples=n, shuffle=True, noise=s, factor=0.5)

	return X, y, 2


def gen0(n, p, s, PRINT=False):

	# feature matrix
	X = np.random.normal(loc=0.0, scale=1.0, size=(n, p))
	# noise
	v = s * np.random.normal(loc=0.0, scale=1.0, size=n)
	# coefficient
	beta = np.ones(p)
	# observation
	y = X.dot(beta) + v
	if PRINT:
		f = plt.figure(0)
		plt.scatter(X[:, 0], y)
		plt.ylabel('y')
		plt.xlabel('X1')
		plt.show()

	return X, y


def gen1(n, p, s, PRINT=False):
	'''
		generate two y (tasks) from the same x

		A: from linear regression with coefficients 1,...,1
		B: from linear regression with coefficients 0,...,p-1

		X from Friedman1
	'''

	# feature matrix
	cov = np.ones((p, p))
	for i in range(p):
		for j in range(p):
			cov[i,j] = np.power(0.9, np.abs(i-j))
	X = np.random.multivariate_normal(np.zeros(p), cov, size=n)

	# IID design
	# X = np.random.normal(loc=0.0, scale=1.0, size=(n, p))

	# noise
	va = s * np.random.normal(loc=0.0, scale=1.0, size=n)
	vb = s * np.random.normal(loc=0.0, scale=1.0, size=n)

	# coefficient
	beta_a = np.ones(p)
	beta_b = np.arange(p)-(p-1)/2.0 #np.arange(p)

	# observation
	mua = X.dot(beta_a)
	ya = mua + va
	mub = X.dot(beta_b)
	yb = mub + vb

	if PRINT:
		f = plt.figure(0)
		plt.scatter(X[:, 0], ya)
		plt.ylabel('y')
		plt.xlabel('X1')
		plt.show()

	return X, ya, yb, mua, mub


def gen2(n, p, s, PRINT=False):
	'''
		generate two y (tasks) from the same x

		A: from quad regression with coefficients 1,...,1
		B: from Friedman1

		X from Friedman1
	'''

	X, mub = datasets.make_friedman1(n_samples=n, n_features=p, noise=0)

	# IID design
	# X = np.random.normal(loc=0.0, scale=1.0, size=(n, p))

	# noise
	va = s * np.random.normal(loc=0.0, scale=1.0, size=n)
	vb = s * np.random.normal(loc=0.0, scale=1.0, size=n)

	# mean function
	# beta_a = np.ones(p)
	# beta_b = np.arange(p)
	beta_a = np.ones(p)
	mua = X.dot(beta_a)

	# observation
	ya = mua + va
	yb = mub + vb

	if PRINT:
		f = plt.figure(0)
		plt.scatter(X[:, 0], ya)
		plt.ylabel('y')
		plt.xlabel('X1')
		plt.show()

	return X, ya, yb, mua, mub


def gen3tasks(n, s, PRINT=False):
	'''
		generate two y (tasks) from the same x

		A: from linear regression with coefficients 1,...,1
		B: from linear regression with coefficients 0,...,p-1

		X from Friedman1

		S1, S2, S3 = np.array([0,1,2]), np.array([3,4]), np.array([5])
		S1 -> S2, S2 -> S3, S3 -> S1
	'''

	# feature matrix
	p = 6
	cov = np.ones((p, p))
	for i in range(p):
		for j in range(p):
			cov[i,j] = np.power(0.9, np.abs(i-j))
	X = np.random.multivariate_normal(np.zeros(p), cov, size=n)

	# IID design
	# X = np.random.normal(loc=0.0, scale=1.0, size=(n, p))

	# coefficient
	v = 2.0

	# # 1 needs 3, 3 needs 1
	# beta1 = np.array([v, 0, 0, 0, v, v])
	# # beta2 = np.array([v, 0, v, 0, v, 0]) # 2 needs 1 and 3
	# beta2 = np.array([0, 0, v, v, 0, 0]) # 2 works alone
	# beta3 = np.array([v, v, 0, 0, v, 0])

	# 1, 2, 3 need each other
	# beta1 = np.array([v, 0, v, 0, v, 0])
	# beta2 = np.array([0, v, 0, v, 0, v])
	# beta3 = np.array([v, 0, 0, v, v, 0])

	beta1 = np.ones(6) * v
	beta2 = np.ones(6) * v
	beta3 = np.ones(6) * v

	# observation
	mu1 = X.dot(beta1)
	y1 = mu1 + s * np.random.normal(loc=0.0, scale=1.0, size=n)
	mu2 = X.dot(beta2)
	y2 = mu2 + s * np.random.normal(loc=0.0, scale=1.0, size=n)
	mu3 = X.dot(beta3)
	y3 = mu3 + s * np.random.normal(loc=0.0, scale=1.0, size=n)

	if PRINT:
		f = plt.figure(0)
		plt.scatter(X[:, 0], ya)
		plt.ylabel('y')
		plt.xlabel('X1')
		plt.show()

	return X, y1, y2, y3, p

if __name__ == '__main__':
	'''
		This area is to test this script
		Not affect any other script once this script is imported as a module

	'''
