import numpy as np 
import os 


def max_scale(x,y,xv,yv): # train features, train y, test features, test y
	# scale based on the max abs components
	sx = np.max(abs(x), axis=0) # scaling factor x : sx 
	sy = np.max(abs(y))
	x /= sx
	y /= sy
	xv /= sx
	yv /= sy
	return (x,y,xv,yv)

def load(datafile,testfile):
	name = os.path.splitext(os.path.basename(datafile))[0]
	print("Loading "+name+" dataset")
	if name == "diabetes": 
		X = np.loadtxt(datafile,delimiter = ',')
		Xv = np.loadtxt(testfile,delimiter = ',')
		y = X[:,-1]
		x = X[:,:-1]
		xv = Xv[:,:-1] # x test (xv)
		yv = Xv[:,-1] # y test (yv)
		x,y,xv,yv = max_scale(x,y,xv,yv)
		x -= np.average(x, axis=0)
		xv -= np.average(x, axis=0)
		y -= np.average(y, axis=0)
		yv -= np.average(y, axis=0)
		return (x,y,xv,yv)

	if name == "naval": 
		X = np.loadtxt(datafile)
		Xv = np.loadtxt(testfile)
		x = X[:, :-2]
		y = X[:,-1]
		xv = Xv[:,:-2] # x test (xv)
		yv = Xv[:,-1] # y test (yv)
		x,y,xv,yv = max_scale(x,y,xv,yv)
		x -= np.average(x, axis=0)
		xv -= np.average(x, axis=0)
		y -= np.average(y, axis=0)
		yv -= np.average(y, axis=0)
		return (x,y,xv,yv)
	
	if (name == "airfoil") or (name == "gas"): 
		X = np.loadtxt(datafile,delimiter = ',')
		Xv = np.loadtxt(testfile,delimiter = ',')
		x = X[:, :-1]
		y = X[:,-1]
		xv = Xv[:,:-1] # x test (xv)
		yv = Xv[:,-1] # y test (yv)

		ymean = np.average(y, axis=0)
		xmean = np.average(x, axis=0)
		x -= xmean
		xv -= ymean
		y -= ymean
		yv -= ymean
		return max_scale(x,y,xv,yv)

	if name == "indoor":
		X = np.loadtxt(datafile,delimiter = ',')
		Xv = np.loadtxt(testfile,delimiter = ',')
		x = X[:, :-9]
		y = X[:,  -8] # latitude and longitude in -9 and -8
		xv = Xv[:, :-9]
		yv = Xv[:,  -8]

		y_offset = np.min(y)
		x_offset = np.min(x,axis = 0)
		y -= y_offset
		x -= x_offset
		xv -= x_offset
		yv -= y_offset
		
		sx = np.max(abs(x + 0.000000001), axis=0) # scaling factor x : sx 
		sy = np.max(abs(y + 0.000000001))
		x /= sx
		y /= sy
		xv /= sx
		yv /= sy
		return (x,y,xv,yv)

	if name == "twitter":
		X = np.loadtxt(datafile,delimiter = ',')
		x = X[:,:-1]
		y = X[:,-1]

		# Just max-scale 
		x -= np.average(x, axis=0)
		sx = np.max(abs(x),axis = 0)
		sy = np.max(abs(y))
		x /= sx 
		y /= sy
		return (x,y,None,None)

	if name == "sml":
		X = np.loadtxt(datafile,delimiter = ',')
		x = X[:,:-1]
		y = X[:,-1]

		# Just max-scale 
		x -= np.average(x, axis=0)
		y -= np.average(y, axis=0)
		sx = np.max(abs(x),axis = 0)
		sx[np.where(sx == 0)] = 1 
		sy = np.max(abs(y))
		x /= sx
		y /= sy
		return (x,y,None,None)

	if name == "pol":
		X = np.loadtxt(datafile,delimiter = ',')
		x = X[:,:-1]
		y = X[:,-1]

		# Just max-scale 
		x -= np.average(x, axis=0)
		y -= np.average(y, axis=0)
		sx = np.max(abs(x),axis = 0)
		sy = np.max(abs(y))
		x /= sx
		y /= sy
		return (x,y,None,None)

	if name == "autos":
		X = np.loadtxt(datafile,delimiter = ',')
		x = X[:,:-1]
		y = np.log(X[:,-1])

		# Just max-scale 
		x -= np.average(x, axis=0)
		y -= np.average(y, axis=0)
		sx = np.max(abs(x),axis = 0)
		sx[np.where(sx == 0)] = 1 
		sy = np.max(abs(y))
		x /= sx
		y /= sy
		return (x,y,None,None)

	if name == "energy":
		X = np.loadtxt(datafile,delimiter = ',')
		x = X[:,:-1]
		y = np.log(X[:,-1])

		# Just max-scale 
		x -= np.average(x, axis=0)
		sx = np.max(abs(x),axis = 0)
		# sx[np.where(sx == 0)] = 1 
		sy = np.max(abs(y))
		x /= sx
		y /= sy
		return (x,y,None,None)


	if name == "machine":
		X = np.loadtxt(datafile,delimiter = ',')
		x = X[:,:-1]
		y = np.log(X[:,-1])

		# Just max-scale 
		x -= np.average(x, axis=0)
		sx = np.max(abs(x),axis = 0)
		# sx[np.where(sx == 0)] = 1 
		sy = np.max(abs(y))
		x /= sx
		y /= sy
		return (x,y,None,None)


	if name == "puma":
		X = np.loadtxt(datafile,delimiter = ',')
		x = X[:,:-1]
		y = X[:,-1]
		# Just max-scale 
		x -= np.average(x, axis=0)
		y -= np.average(y, axis=0)
		sx = np.max(abs(x),axis = 0)
		# sx[np.where(sx == 0)] = 1 
		sy = np.max(abs(y))
		x /= sx
		y /= sy
		return (x,y,None,None)

	if name == "parkinsons":
		X = np.loadtxt(datafile,delimiter = ',')
		x1 = X[:,:5]
		x2 = X[:,6:]
		x = np.hstack((x1,x2))
		y = X[:,5]
		# Just max-scale 
		x -= np.average(x, axis=0)
		y -= np.average(y, axis=0)
		sx = np.max(abs(x),axis=0)
		# sx[np.where(sx == 0)] = 1 
		sy = np.max(abs(y))
		x /= sx
		y /= sy
		return (x,y,None,None)


	if name == "concrete":
		X = np.loadtxt(datafile,delimiter = ',')
		x = X[:,:-4]
		y = X[:,-4]
		# Just max-scale 
		x -= np.average(x, axis=0)
		sx = np.max(abs(x),axis = 0)
		# sx[np.where(sx == 0)] = 1 
		sy = np.max(abs(y))
		x /= sx
		y /= sy
		return (x,y,None,None)


	if name == "skill":
		X = np.loadtxt(datafile,delimiter = ',')
		x1 = X[:,:12]
		x2 = X[:,13:]
		x = np.hstack((x1,x2))
		y = X[:,12]
		# Just max-scale
		x -= np.average(x, axis=0)
		y -= np.average(y, axis=0)
		sx = np.max(abs(x),axis = 0)
		# sx[np.where(sx == 0)] = 1 
		sy = np.max(abs(y))
		x /= sx
		y /= sy
		return (x,y,None,None)


	if name == "bike":
		X = np.loadtxt(datafile,delimiter = ',')
		x = X[:,1:-1]
		y = X[:,-1]
		# Just max-scale
		x -= np.average(x, axis=0)
		sx = np.max(abs(x),axis = 0)
		# sx[np.where(sx == 0)] = 1 
		sy = np.max(abs(y))
		x /= sx
		y /= sy
		return (x,y,None,None)


def format_dataset(features,y,intercept = True): 
	N = features.shape[0]
	ones = np.ones((N,1))
	y = y.reshape((N,1))
	if intercept: 
		dataset = np.hstack((features,ones,y))
	else: 
		dataset = np.hstack((features,y))
	return dataset


















