import pdb
from copy import copy
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as Data
from torch.autograd import Variable
from utilityScript import standardizeData

class SCEVisualizer(nn.Module):
	def __init__(self, netConfig={}):
		super(SCEVisualizer, self).__init__()
		if len(netConfig.keys())!=0:		
			self.inputDim,self.outputDim=netConfig['inputDim'],netConfig['inputDim']
			self.hLayer,self.hLayerPost=copy(netConfig['hL']),copy(netConfig['hL'])
			for i in range(len(self.hLayerPost)-1,0,-1):
				self.hLayerPost.extend([self.hLayerPost[i-1]])

			self.l1Penalty,self.l2Penalty=0.0,0.0
			self.oActFunc,self.errorFunc='linear','MSE'
			self.linearDecoder=False
			#pdb.set_trace()
			if 'l1Penalty' in netConfig.keys(): self.l1Penalty=netConfig['l1Penalty']
			if 'l2Penalty' in netConfig.keys(): self.l2Penalty=netConfig['l2Penalty']			
			if 'errorFunc' in netConfig.keys(): self.errorFunc=netConfig['errorFunc']
			if 'oActFunc' in netConfig.keys(): self.oActFunc=netConfig['oActFunc']
			if 'linearDecoder' in netConfig.keys(): self.linearDecoder=netConfig['linearDecoder']

			self.hActFunc,self.hActFuncPost=copy(netConfig['hActFunc']),copy(netConfig['hActFunc'])
			if not(self.linearDecoder) == True:
				for i in range(len(self.hActFuncPost)-1,0,-1):
					self.hActFuncPost.extend([self.hActFuncPost[i-1]])
			else:
				for i in range(len(self.hActFuncPost)-1,0,-1):
					self.hActFuncPost.extend(['linear'])
			#check for dropout
			#pdb.set_trace()
			self.dropoutFlag = False
			if 'dropoutRate' in netConfig.keys():
				self.dropoutRate = netConfig['dropoutRate']
				self.dropoutFlag = True
				for i in range(len(self.dropoutRate)-1,0,-1):
					self.dropoutRate.extend([self.dropoutRate[i-1]])

		else:#for default set up
			self.hLayer=[2]
			self.oActFunc,self.errorFunc='linear','MSE'
			self.hActFunc,self.hActFuncPost='tanh','tanh'

		self.device = None
		#self.bottleneckCons = False
		self.Lambda1 = 1.0
		self.Lambda2 = 1.0
		#internal variables
		self.epochError=[]
		self.trMu=[]
		self.trSd=[]
		self.tmpPreHActFunc=[]
		self.currentLayer = None
		self.classCenters = {}

	def initNet(self,input_size,hidden_layer):
		self.hidden=nn.ModuleList()
		# Hidden layers
		if len(hidden_layer)==1:
			self.hidden.append(nn.Linear(input_size,hidden_layer[0]))
		elif(len(hidden_layer)>1):
			for i in range(len(hidden_layer)-1):
				if i==0:
					self.hidden.append(nn.Linear(input_size, hidden_layer[i]))
					self.hidden.append(nn.Linear(hidden_layer[i], hidden_layer[i+1]))
				else:
					self.hidden.append(nn.Linear(hidden_layer[i],hidden_layer[i+1]))
		self.reset_parameters(hidden_layer)
		# Output layer
		self.out = nn.Linear(hidden_layer[-1], input_size)

	def reset_parameters(self,hidden_layer):
		#pdb.set_trace()
		tmpActFunc = self.hActFunc[:int(np.ceil(len(hidden_layer)/2))]
		for i in range(len(tmpActFunc)-1,0,-1):
			tmpActFunc.extend([tmpActFunc[i-1]])
		hL = 0
		
		while True:
			#pdb.set_trace()
			if tmpActFunc[hL].upper() in ['SIGMOID','TANH']:
				#pdb.set_trace()
				torch.nn.init.xavier_uniform_(self.hidden[hL].weight)
				if self.hidden[hL].bias is not None:
					torch.nn.init.zeros_(self.hidden[hL].bias)
				#continue
			elif tmpActFunc[hL].upper() == 'RELU':
				torch.nn.init.kaiming_uniform_(self.hidden[hL].weight, mode='fan_in', nonlinearity='relu')
				if self.hidden[hL].bias is not None:
					torch.nn.init.zeros_(self.hidden[hL].bias)
			elif tmpActFunc[hL].upper() == 'LRELU':
				torch.nn.init.kaiming_uniform_(self.hidden[hL].weight, mode='fan_in', nonlinearity='leaky_relu')
				if self.hidden[hL].bias is not None:
					torch.nn.init.zeros_(self.hidden[hL].bias)
			if hL == len(hidden_layer)-1:
				break
			hL += 1

	def forwardPost(self, x):
		# Feedforward
		if self.dropoutFlag is False:
			for l in range(len(self.hidden)):
				if self.hActFuncPost[l].upper()=='SIGMOID':
					x = torch.sigmoid(self.hidden[l](x))
				elif self.hActFuncPost[l].upper()=='TANH':
					x = torch.tanh(self.hidden[l](x))
				elif self.hActFuncPost[l].upper()=='RELU':
					x = torch.relu(self.hidden[l](x))
				elif self.tmpPreHActFunc[l].upper()=='LRELU':
					x = F.leaky_relu(self.hidden[l](x),inplace=False)
				else:#default is linear				
					x = self.hidden[l](x)
				if l == int(len(self.hidden)/2):# bottleneck layer used
					y = x #bottleneck output
				
			if self.oActFunc.upper()=='SIGMOID':
				return y,torch.sigmoid(self.out(x))
			else:
				return y,self.out(x)
		else:
			#pdb.set_trace()
			for l in range(len(self.hidden)):
				if self.hActFuncPost[l].upper()=='SIGMOID':
					x = torch.sigmoid(self.hidden[l](x))
				elif self.hActFuncPost[l].upper()=='TANH':
					x = torch.tanh(self.hidden[l](x))
				elif self.hActFuncPost[l].upper()=='RELU':
					x = torch.relu(self.hidden[l](x))
				elif self.tmpPreHActFunc[l].upper()=='LRELU':
					x = F.leaky_relu(self.hidden[l](x),inplace=False)
				else:#default is linear				
					x = self.hidden[l](x)
				
				#apply dropout
				x = nn.Dropout(self.dropoutRate[l])(x)

				if l == int(len(self.hidden)/2):# bottleneck layer used
					y = x #bottleneck output
				
			if self.oActFunc.upper()=='SIGMOID':
				return y,torch.sigmoid(self.out(x))
			else:
				return y,self.out(x)

	def forwardPre(self, x):
		# Feedforward
		if self.dropoutFlag is False:
			for l in range(len(self.hidden)):
				if self.tmpPreHActFunc[l].upper()=='SIGMOID':
					x = torch.sigmoid(self.hidden[l](x))
				elif self.tmpPreHActFunc[l].upper()=='TANH':
					x = torch.tanh(self.hidden[l](x))
				elif self.tmpPreHActFunc[l].upper()=='RELU':
					x = torch.relu(self.hidden[l](x))
				elif self.tmpPreHActFunc[l].upper()=='LRELU':
					x = F.leaky_relu(self.hidden[l](x),inplace=False)
				else:#default is linear
					x = self.hidden[l](x)

			if self.oActFunc.upper()=='SIGMOID':
				return torch.sigmoid(self.out(x))
			else:
				return self.out(x)
		else:
			#pdb.set_trace()
			for l in range(len(self.hidden)):
				if self.tmpPreHActFunc[l].upper()=='SIGMOID':
					x = torch.sigmoid(self.hidden[l](x))
				elif self.tmpPreHActFunc[l].upper()=='TANH':
					x = torch.tanh(self.hidden[l](x))
				elif self.tmpPreHActFunc[l].upper()=='RELU':
					x = torch.relu(self.hidden[l](x))
				elif self.tmpPreHActFunc[l].upper()=='LRELU':
					x = F.leaky_relu(self.hidden[l](x),inplace=False)
				else:#default is linear
					x = self.hidden[l](x)
				x = nn.Dropout(self.dropoutRate[self.currentLayer])(x)
				#print('Dropout for hidden layer',self.currentLayer)

			if self.oActFunc.upper()=='SIGMOID':
				return torch.sigmoid(self.out(x))
			else:
				return self.out(x)

	def setHiddenWeight(self,W,b):
		for i in range(len(self.hidden)):
			self.hidden[i].bias.data=b[i].float()
			self.hidden[i].weight.data=W[i].float()

	def setOutputWeight(self,W,b):
		self.out.bias.data=b.float()
		self.out.weight.data=W.float()

	def returnTransformedData(self,x):
		fOut=[x]
		with torch.no_grad():#we don't need to compute gradients (for memory efficiency)
			for layer in self.hidden:
				fOut.append(self.hiddenActivation(layer(fOut[-1])))
			if self.output_activation.upper()=='SIGMOID':
				fOut.append(torch.sigmoid(self.out(fOut[-1])))
			else:
				fOut.append(self.out(fOut[-1]))
		return fOut[1:]#Ignore the original input
		
	def createOutputAsCentroids(self,data,label):
		#pdb.set_trace()
		centroidLabels=np.unique(label)
		outputData=np.zeros([np.shape(data)[0],np.shape(data)[1]])
		for i in range(len(centroidLabels)):
			indices=np.where(centroidLabels[i]==label)[0]
			tmpData=data[indices,:]
			centroid=np.mean(tmpData,axis=0)
			outputData[indices,]=centroid
			self.classCenters['Cls'+str(int(centroidLabels[i]))+'_Cen'+str(0)] = centroid
		return outputData

	def createEncoderOutputAsCentroids(self,D,L):
		#D: an [n x d] matrix
		#	n: no of samples
		#	d: dimension of data
		outputData = torch.zeros([D.shape[0],D.shape[1]],device=self.device)
		nClass = len(torch.unique(L))
		with torch.no_grad():#I don't need to calculate gradient for this operations
			for i in range(nClass):
				indices = torch.where(L==i)[0]
				tmpData = D[indices,:]
				centroid = torch.mean(tmpData,axis=0)
				outputData[indices,] = centroid
		#pdb.set_trace()
		return outputData
		
	def createOutputWithMultiCenter(self,trData,trLabels,clusterList,maxItr):
		# use kMean to cluster each class
		from sklearn.cluster import KMeans
		outputData = []
		classes = np.unique(trLabels)
		#pdb.set_trace()
		for i in range(len(classes)):
			#collect the indices of a class
			indices = np.where(trLabels == classes[i])[0]
			#put those samples in a variable call classData
			classData = trData[indices,:]
			#declare a var tmpOutPut with same dim of classData
			tmpOutPut = np.zeros([len(classData),np.shape(classData)[1]])
			#run clustering on the classData
			kmeans = KMeans(n_clusters=clusterList[i], n_init=25, max_iter=maxItr, random_state=0).fit(classData)
			print('Running kMean for class',classes[i].astype(int),'No of centers:',clusterList[i])
			#in this loop we'll assign the cluster center as output for each cluster member
			for c in range(clusterList[i]):
				patternIndices = np.where(c == kmeans.labels_)[0]
				#now er are using the tmpOutPut, the reason is simple
				tmpOutPut[patternIndices,:] = kmeans.cluster_centers_[c]
				self.classCenters['Cls'+str(int(classes[i]))+'_Cen'+str(int(c))] = kmeans.cluster_centers_[c]
			
			outputData.append(tmpOutPut)
		#pdb.set_trace()
		return np.vstack((outputData))
		
	def sortDataOnLabel(self,trData,trLabels):
		from operator import itemgetter
		trLabels = trLabels.reshape(-1,1)
		lTrData = np.hstack((trData,trLabels))
		sortedData = np.array(sorted(lTrData, key=itemgetter(-1)))
		return sortedData[:,:-1], sortedData[:,-1].reshape(-1,1)
		
	def returnBottleneckCentroids(self,D,L):
		#D: an [n x d] matrix
		#       n: no of samples
		#       d: dimension of data
		classes = torch.unique(L)
		classCombo = torch.combinations(classes,2)
		outputDataDict = {}
		
		for c in classes:
			indices = torch.where(L==c)[0]
			centroid = torch.mean(D[indices,:],axis=0)
			outputDataDict[int(c)] = centroid
			#pdb.set_trace()
		return outputDataDict,classCombo
		
	def calcInterClassDistances(self,D,L,criterion):
		#D: an [n x d] matrix
		#       n: no of samples
		#       d: dimension of data
		#This function will calculate the distance from the centroid of each class pairs in bottleneck
		#Take the logarithm of the distances and maximises the sum of the logs.
		
		nClass = len(torch.unique(L))
		nSample = len(D)
		C,classCombo = self.returnBottleneckCentroids(D,L)
		#outputData = torch.zeros([nSample*(nClass-1),D.shape[1]],device=self.device)
		separationLoss = 0
		
		for combo in classCombo:
				class0 = int(combo[0])
				class1 = int(combo[1])
				#separationLoss += torch.log(criterion(C[class0],C[class1]))
				separationLoss += 1/(1+torch.norm((C[class0]-C[class1]),dim=0))
		#pdb.set_trace()
		#return -separationLoss/len(classCombo)
		return separationLoss
		
	def createNeighInputTarget(self,trData,trLabels,neighParam=5):
		from sklearn.neighbors import kneighbors_graph
		classes = np.unique(trLabels)
		neigh_input = []
		neigh_target = []
		for i in range(len(classes)):
			#collect the indices of a class
			indices = np.where(trLabels == classes[i])[0]
			#put those samples in a variable call classData
			classData = trData[indices,:]
			#declare a var tmpOutPut with same dim of classData
			#tmpOutPut = np.zeros([len(classData),np.shape(classData)[1]])

			print('Creating knn graph with',neighParam,'neighbors for class',classes[i])
			G = kneighbors_graph(classData, neighParam, mode='connectivity', include_self=True)
			G = G.toarray()
			#neighCenters = []
			for j in range(len(G)):
				#indexSet.append(np.where(G[i,:]==1)[0])
				indexSet = np.where(G[j,:]==1)[0]
				neigh_input.append(classData[indexSet,:])
				#neighCenters.append(np.mean(trInput[-1],axis=0))
				neigh_target.append(np.tile(np.mean(neigh_input[-1],axis=0),(len(indexSet),1)))
				#pseudoLabels.append(i*np.ones(len(indexSet)))
			#neighCenters = np.vstack((neighCenters))
		neigh_input = np.vstack((neigh_input))
		neigh_target = np.vstack((neigh_target))

		return neigh_input,neigh_target

	def preTrain(self,dataLoader,learningRate,batchSize,numEpochs,verbose):

		# set device
		#device = self.device
		
		#loop to do layer-wise pre-training
		for d in range(len(self.hLayer)):
			self.currentLayer = d
			#set the hidden layer structure for a bottleneck architecture
			hidden_layer=self.hLayer[:d+1]
			self.tmpPreHActFunc=self.hActFunc[:d+1]
			for i in range(len(hidden_layer)-1,0,-1):
				hidden_layer.extend([hidden_layer[i-1]])
				self.tmpPreHActFunc.extend([self.tmpPreHActFunc[i-1]])

			if verbose:
				if d==0:
					print('Pre-training layer [',self.inputDim,'-->',hidden_layer[0],'-->',self.inputDim,']')
				else:
					index=int(len(hidden_layer)/2)
					print('Pre-training layer [',hidden_layer[index-1],'-->',hidden_layer[index],'-->',hidden_layer[index+1],']')			

			#initialize the network weight and bias
			self.initNet(self.inputDim,hidden_layer)

			#freeze pretrained layers
			if d>0:
				j=0#index for preW and preB
				for l in range(len(hidden_layer)):
					if (l==d) or (l==(d+1)):
						continue
					else:
						self.hidden[l].weight=preW[j]
						self.hidden[l].weight.requires_grad=False
						self.hidden[l].bias=preB[j]
						self.hidden[l].bias.requires_grad=False
						j+=1
				self.out.weight=preW[-1]
				self.out.weight.requires_grad=False
				self.out.bias=preB[-1]
				self.out.bias.requires_grad=False

			# set loss function
			if self.errorFunc.upper() == 'CE':
				criterion = nn.CrossEntropyLoss()
			elif self.errorFunc.upper() == 'BCE':
				criterion = nn.BCELoss()
			elif self.errorFunc.upper() == 'MSE':
				criterion = nn.MSELoss()

			# set optimization function
			optimizer = torch.optim.Adam(self.parameters(),lr=learningRate,amsgrad=True)

			# Load the model to device
			self.to(self.device)

			# Start training
			for epoch in range(numEpochs):
				error=[]
				for i, (trInput, trOutput,L) in enumerate(dataLoader):
				#for i, (trInput, trOutput) in enumerate(dataLoader):
					# Move tensors to the configured device
					trInput = trInput.to(self.device)
					trOutput = trOutput.to(self.device)

					# Forward pass
					outputs = self.forwardPre(trInput)
					loss = criterion(outputs, trOutput)
					
					# Check for regularization
					if self.l1Penalty != 0 or self.l2Penalty != 0:
						l1RegLoss,l2RegLoss = torch.tensor([0.0],requires_grad=True).to(self.device), torch.tensor([0.0],requires_grad=True).to(self.device)
						if self.l1Penalty != 0 and self.l2Penalty == 0:
							for W in self.parameters():
								l1RegLoss += W.norm(1)
							loss = loss + self.l1Penalty * l1RegLoss
						elif self.l1Penalty == 0 and self.l2Penalty != 0:
							for W in self.parameters():
								l2RegLoss += W.norm(2)**2
							loss = loss + 0.5 * self.l2Penalty * l2RegLoss
						elif self.l1Penalty != 0 and self.l2Penalty != 0:
							for W in self.parameters():
								l2RegLoss += W.norm(2)**2
								l1RegLoss += W.norm(1)
							loss = loss + self.l1Penalty * l1RegLoss + 0.5 * self.l2Penalty * l2RegLoss
					
					error.append(loss.item())

					# Backward and optimize
					optimizer.zero_grad()
					loss.backward()
					optimizer.step()

				#self.epochError.append(np.mean(error))
				#if verbose and ((epoch+1) % (numEpochs*0.1)) == 0:
				#	print ('Epoch [{}/{}], Loss: {:.6f}'.format(epoch+1, numEpochs, self.epochError[-1]))
			
			#variable to store pre-trained weight and bias
			if d <len(self.hLayer)-1:
				preW=[]
				preB=[]
				for h in range(len(hidden_layer)):
					preW.append(self.hidden[h].weight)
					preB.append(self.hidden[h].bias)
				preW.append(self.out.weight)
				preB.append(self.out.bias)
			#pdb.set_trace()

		#now set requires_grad =True for all the layers
		for l in range(len(hidden_layer)):			
			self.hidden[l].weight.requires_grad=True			
			self.hidden[l].bias.requires_grad=True
			
		self.out.weight.requires_grad=True
		self.out.bias.requires_grad=True
		
		if verbose:
			print('Pre-training is done.')

	def postTrain(self,dataLoader,learningRate,batchSize,numEpochs,verbose):

		# set device
		#device = self.device
		
		# set loss function
		if self.errorFunc.upper() == 'CE':
			criterion = nn.CrossEntropyLoss()
		elif self.errorFunc.upper() == 'BCE':
			criterion = nn.BCELoss()
		elif self.errorFunc.upper() == 'MSE':
			criterion = nn.MSELoss()

		# set optimization function
		optimizer = torch.optim.Adam(self.parameters(),lr=learningRate,amsgrad=True)
		

		# Load the model to device
		self.to(self.device)
		
		# Start training
		if verbose:
			print('Training network:',self.inputDim,'-->',self.hLayerPost,'-->',self.inputDim)
		for epoch in range(numEpochs):
			error=[]
			#if (epoch+1)%50 == 0 and learningRate > 0.001:
			#	learningRate /= 10
			#	print('Learning rate:',learningRate)
				#decrease learning rate
			#	optimizer = torch.optim.Adam(self.parameters(),lr=learningRate,amsgrad=True)
			#elif (epoch+1)%50 == 0 and learningRate <= 0.001:
			#	learningRate /= 2
			#	print('Learning rate:',learningRate)
				#decrease learning rate
			#	optimizer = torch.optim.Adam(self.parameters(),lr=learningRate,amsgrad=True)
			for i, (trInput, trOutput,L) in enumerate(dataLoader):
			#for i, (trInput, trOutput) in enumerate(dataLoader):
				# Move tensors to the configured device
				trInput = trInput.to(self.device)
				trOutput = trOutput.to(self.device)
				#L = L.to(self.device)

				# Forward pass
				output_en,outputs = self.forwardPost(trInput)
				loss = criterion(outputs, trOutput)
				#if torch.any(torch.isnan(output_en)):
				#	pdb.set_trace()
				#pdb.set_trace()
				if self.bottleneckCons:
					#calculate centroids of each class using bottleneck activation
					ceOutput_en = self.createEncoderOutputAsCentroids(output_en,L)
					#calculate CE loss with bottleneck output
					ceLoss = criterion(output_en,ceOutput_en)
					separationLoss = self.calcInterClassDistances(output_en,L,criterion)
					loss = loss + self.Lambda1*ceLoss + self.Lambda2*separationLoss
					#loss = loss + 0.001*ceLoss + 0.001*separationLoss
					
				# Check for regularization
				if self.l1Penalty != 0 or self.l2Penalty != 0:
					l1RegLoss,l2RegLoss = torch.tensor([0.0],requires_grad=True).to(self.device), torch.tensor([0.0],requires_grad=True).to(self.device)
					if self.l1Penalty != 0 and self.l2Penalty == 0:
						for W in self.parameters():
							l1RegLoss += W.norm(1)
						loss = loss + self.l1Penalty * l1RegLoss
					elif self.l1Penalty == 0 and self.l2Penalty != 0:
						for W in self.parameters():
							l2RegLoss += W.norm(2)**2
						loss = loss + 0.5 * self.l2Penalty * l2RegLoss
					elif self.l1Penalty != 0 and self.l2Penalty != 0:
						for W in self.parameters():
							l2RegLoss += W.norm(2)**2
							l1RegLoss += W.norm(1)
						loss = loss + self.l1Penalty * l1RegLoss + 0.5 * self.l2Penalty * l2RegLoss
				
				error.append(loss.item())

				# Backward and optimize
				optimizer.zero_grad()
				loss.backward()
				optimizer.step()

			self.epochError.append(np.mean(error))
			if verbose and ((epoch+1) % (numEpochs*0.25)) == 0:
				print ('Epoch [{}/{}], Loss: {:.6f}'.format(epoch+1, numEpochs, self.epochError[-1]))


	def fit(self,trData,trLabels,preTraining=True,learningRate=0.001,miniBatchSize=100,numEpochsPreTrn=25,
		numEpochsPostTrn=100,standardizeFlag=True,cudaDeviceId=0,multiClass=False,clusterList=[],maxItrClustering=500,verbose=True):

		# set device
		self.device = torch.device('cuda:'+str(cudaDeviceId))
		self.nClass = len(np.unique(trLabels))
		if standardizeFlag:
		#standardize data
			mu,sd,trData = standardizeData(trData)
			self.trMu=mu
			self.trSd=sd

		#create target: centroid for each class
		if not(multiClass):
			target=self.createOutputAsCentroids(trData,trLabels)
			#target=trData#for autoencoder training
			#neighInput,neighTarget = self.createNeighInputTarget(trData,trLabels)
		else:
			if clusterList == []: #when clusterList is empty assign 5 centers per class(default)
				clusterList = 2*np.ones(self.nClass).astype(int)
			#sorting the data by labels. Labels are numeric starting from 0
			#This is an important step otherwise wrong center will be assigned to each data after kMean
			trData,trLabels = self.sortDataOnLabel(trData,trLabels)
			target = self.createOutputWithMultiCenter(trData,trLabels,clusterList,maxItrClustering)
		#pdb.set_trace()
		#Prepare data for torch
		#trDataTorch=Data.TensorDataset(torch.from_numpy(trData).float(),torch.from_numpy(target).float())
		trDataTorch = Data.TensorDataset(torch.from_numpy(trData).float(),torch.from_numpy(target).float(),torch.from_numpy(trLabels))
		#trDataTorch = Data.TensorDataset(torch.from_numpy(neighInput).float(),torch.from_numpy(neighTarget).float())
		dataLoader = Data.DataLoader(dataset=trDataTorch,batch_size=miniBatchSize,shuffle=True)

		#layer-wise pre-training
		if preTraining:
			self.preTrain(dataLoader,learningRate,miniBatchSize,numEpochsPreTrn,verbose)
		else:
			#initialize the network weight and bias
			self.initNet(self.inputDim,self.hLayerPost)
		#post training
		self.postTrain(dataLoader,learningRate,miniBatchSize,numEpochsPostTrn,verbose)
		
	def predict(self,x):
		if len(self.trMu) != 0 and len(self.trSd) != 0:#standarization has been applied on training data so apply on test data
			x = standardizeData(x,self.trMu,self.trSd)
		
		x=torch.from_numpy(x).float().to('cpu')
		fOut=[x]
		with torch.no_grad():#we don't need to compute gradients (for memory efficiency)
			for l in range(len(self.hidden)):
				if self.hActFuncPost[l].upper()=='SIGMOID':
					fOut.append(torch.sigmoid(self.hidden[l](fOut[-1])))
				elif self.hActFuncPost[l].upper()=='TANH':
					fOut.append(torch.tanh(self.hidden[l](fOut[-1])))
				elif self.hActFuncPost[l].upper()=='RELU':
					fOut.append(torch.relu(self.hidden[l](fOut[-1])))
				else:#default is linear				
					fOut.append(self.hidden[l](fOut[-1]))
				if self.dropoutFlag: #dropout was applied during training, so adjuct the activation in test
					fOut[-1] = (1-self.dropoutRate[l])*fOut[-1]

			if self.oActFunc.upper()=='SIGMOID':
				fOut.append(torch.sigmoid(self.out(fOut[-1])))
			else:
				fOut.append(self.out(fOut[-1]))

		return fOut
