/**
File:		MachineLearning/GPModels/Models/GPModels/FgAEPSparseDGPLVM.h

Author:		
Email:		
Site:       

Copyright (c) 2020 . All rights reserved.
*/

#pragma once

#include <MachineLearning/FgSparseDeepGPLVMBaseModel.h>

namespace NeuralEngine
{
	namespace MachineLearning
	{
		namespace GPModels
		{
			namespace AEP
			{

				////////////////////////////////////////////////////////////////////////////////////////////////////
				/// <summary> Sparse deep GPLVM  via Approximated Expectation Propagation (AEP). </summary>
				///
				/// <remarks>
				///		<para>
				/// 		Instead of taking one Gaussian portion out to form the cavity, we take out a
				/// 		fraction defined by the parameter \f$\alpha\f$, which can also be seen as a ratio parameter
				/// 		between VFE and PowerEp with FITC approximation. This enables deep structures for GPLVM.
				/// 	</para>
				/// 	<para>	
				/// 		GPLVM are the nonlinear dual version of probabilistic PCA, where a low dimensional 
				/// 		latent variable \f$\mathbf{X}=[\mathbf{x}_1,...,\mathbf{x}_N]^T$\f is mapped onto a
				/// 		high dimensional data variable \f$\mathbf{Y}=[\mathbf{y}_1,...,\mathbf{y}_N]^T$\f via
				/// 		prior mapping function \f$f(\mathbf{x})$\f. The difference to normal GPs is the uncertainty
				/// 		of \f$\mathbf{X}$\f, which will be initialized via PCA and optimized during learning.
				/// 		To avoid memory issues for larger data sets, the algorithm takes use of sparse approximation
				/// 		techniques.
				///		</para>
				///		<para>
				/// 		Sparse approximations are used for larger
				/// 		data sets to reduce memory size and computational complexity.  This is
				/// 		done by introducing a subset of inducing points or pseudo inputs to approximate
				/// 		the full set. The inversion of the kernel matrix depends only on those points
				/// 		and reduces the computational complexity from \f$O(N^3)$\f$ to $$O(k^2N)$\f, where
				/// 		\f$k\f$ is the number of inducing points and \f$N\f$ the length of the data set.
				///		</para>
				/// 	<para>
				///			References:
				///			<list type="bullet">
				///			<item>
				///			  	  <description><a href="http://mlg.eng.cam.ac.uk/thang/docs/papers/thesis-thang.pdf" target="_blank">
				///					 Bui, T. D. (2018). Efficient Deterministic Approximate Bayesian Inference for Gaussian 
				///					 Process models (Doctoral thesis). https://doi.org/10.17863/CAM.20913  </a>
				///			     </description>
				///			  </item>
				///		</para>
				/// 	
				/// 	
				/// 	, 24.11.2019. 
				/// </remarks>
				////////////////////////////////////////////////////////////////////////////////////////////////////
				template<typename Scalar>
				class NE_IMPEXP SDGPLVM : public SparseDeepGPLVMBaseModel<Scalar>
				{
				public:
					////////////////////////////////////////////////////////////////////////////////////////////////////
					/// <summary>	Constructor. </summary>
					///
					/// <remarks>	, 12.06.2018. </remarks>
					///
					/// <param name="Y">					 	The training data. </param>
					/// <param name="X">					 	The training inputs. </param>
					/// <param name="hiddenLayerdescription">	The description for one hidden layer. </param>
					/// <param name="alpha">				 	(Optional) The alpha. </param>
					/// <param name="lType">				 	(Optional) likelihood type. </param>
					////////////////////////////////////////////////////////////////////////////////////////////////////
					SDGPLVM(const af::array& Y, int latentDimension, HiddenLayerDescription description, Scalar alpha = 1.0,
						Scalar priorMean = 0.0, Scalar priorVariance = 1.0, PropagationMode probMode = PropagationMode::MomentMatching,
						LogLikType lType = LogLikType::Gaussian, XInit emethod = XInit::pca);

					////////////////////////////////////////////////////////////////////////////////////////////////////
					/// <summary>	Constructor. </summary>
					///
					/// <remarks>	, 26.03.2018. </remarks>
					///
					/// <param name="Y">		   	The data af::array to process. </param>
					/// <param name="X">		   	The training inputs. </param>
					/// <param name="hiddenLayerdescriptions">	The hidden layer descriptions. </param>
					/// <param name="lType">	   	(Optional) the loglik type. </param>
					////////////////////////////////////////////////////////////////////////////////////////////////////
					SDGPLVM(const af::array& Y, int latentDimension, std::vector<HiddenLayerDescription> descriptions, Scalar alpha = 1.0,
						Scalar priorMean = 0.0, Scalar priorVariance = 1.0, PropagationMode probMode = PropagationMode::MomentMatching,
						LogLikType lType = LogLikType::Gaussian, XInit emethod = XInit::pca);

					////////////////////////////////////////////////////////////////////////////////////////////////////
					/// <summary>	Default Constructor. </summary>
					///
					/// <remarks>	Hmetal T, 29.11.2017. </remarks>
					///
					////////////////////////////////////////////////////////////////////////////////////////////////////
					SDGPLVM();

					////////////////////////////////////////////////////////////////////////////////////////////////////
					/// <summary>	Destructor. </summary>
					///
					/// <remarks>	, 23.04.2018. </remarks>
					////////////////////////////////////////////////////////////////////////////////////////////////////
					virtual ~SDGPLVM();

					////////////////////////////////////////////////////////////////////////////////////////////////////
					/// <summary>	Cost function the given parameter inputs. </summary>
					///
					/// <remarks>	Hmetal T, 29.11.2017. </remarks>
					///
					/// <param name="x">		  	The parameters to be optimized. </param>
					/// <param name="outGradient">	[in,out] The out gradient. </param>
					///
					/// <returns>	A Scalar. </returns>
					////////////////////////////////////////////////////////////////////////////////////////////////////
					virtual Scalar Function(const af::array& x, af::array& outGradient) override;

				protected:

					////////////////////////////////////////////////////////////////////////////////////////////////////
					/// <summary>	Computes the cavity distribution of the latent points. </summary>
					///
					/// <remarks>	Hmetal T, 09/12/2019. </remarks>
					///
					/// <param name="mx">	[in,out] The mx. </param>
					/// <param name="vx">	[in,out] The vx. </param>
					////////////////////////////////////////////////////////////////////////////////////////////////////
					void CavityLatents(af::array& mx, af::array& vx);

					////////////////////////////////////////////////////////////////////////////////////////////////////
					/// <summary>	Gradient of the cavity distribution w.r.t. latent points. </summary>
					///
					/// <remarks>	HmetalT, 09/12/2019. </remarks>
					///
					/// <param name="dmx">	The dmx. </param>
					/// <param name="dvx">	The dvx. </param>
					/// <param name="m">  	An af::array to process. </param>
					/// <param name="v">  	An af::array to process. </param>
					///
					/// <returns>	An af::array. </returns>
					////////////////////////////////////////////////////////////////////////////////////////////////////
					af::array CavityGradientLatents(const af::array& dmx, const af::array& dvx, const af::array& m, const af::array& v);

					////////////////////////////////////////////////////////////////////////////////////////////////////
					/// <summary>	Calculates the likelihood contribution of the latent points and its gradients. </summary>
					///
					/// <remarks>	Hmetal T, 09/12/2019. </remarks>
					///
					/// <param name="mx"> 	The mean. </param>
					/// <param name="vx"> 	The variance. </param>
					/// <param name="dmx">	[in,out] The gradient of the mean. </param>
					/// <param name="dvx">	[in,out] The gradient of the variance. </param>
					///
					/// <returns>	The calculated phi contribution. </returns>
					////////////////////////////////////////////////////////////////////////////////////////////////////
					Scalar ComputePhiLatents(const af::array& mx, const af::array& vx, af::array* dmx = nullptr, af::array* dvx = nullptr);

				private:
					Scalar dAlpha;	//!< fraction parameter
					PropagationMode pMode;

					friend class boost::serialization::access;

					template<class Archive>
					void serialize(Archive& ar, unsigned int version)
					{
						ar& boost::serialization::base_object<SparseDeepGPLVMBaseModel<Scalar>>(*this);
						//ar& boost::serialization::make_nvp("SparseDeepGPLVMBaseModel", boost::serialization::base_object<SparseDeepGPLVMBaseModel<Scalar>>(*this));
						ar& BOOST_SERIALIZATION_NVP(dAlpha);
						ar& BOOST_SERIALIZATION_NVP(pMode);
					}
				};
			}
		}
	}
}
/** @example AEP_SDGPLVM_Examples.cpp */