/**
File:		MachineLearning/GPModels/Models/GPModels/FgAEPSparseDGPLVM.h

Author:		
Email:		
Site:       

Copyright (c) 2020 . All rights reserved.
*/

#pragma once

#include <MachineLearning/FgSparseDeepGPSSMBaseModel.h>

namespace NeuralEngine
{
	namespace MachineLearning
	{
		namespace GPModels
		{
			namespace AEP
			{

				////////////////////////////////////////////////////////////////////////////////////////////////////
				/// <summary> Sparse deep GPSSM  via Approximated Expectation Propagation (AEP). </summary>
				///
				/// <remarks>
				///		<para>
				/// 		Instead of taking one Gaussian portion out to form the cavity, we take out a
				/// 		fraction defined by the parameter \f$\alpha\f$, which can also be seen as a ratio parameter
				/// 		between VFE and PowerEp with FITC approximation. This enables deep structures for GPLVM.
				/// 	</para>
				/// 	<para>	
				/// 		GPLVM are the nonlinear dual version of probabilistic PCA, where a low dimensional 
				/// 		latent variable \f$\mathbf{X}=[\mathbf{x}_1,...,\mathbf{x}_N]^T$\f is mapped onto a
				/// 		high dimensional data variable \f$\mathbf{Y}=[\mathbf{y}_1,...,\mathbf{y}_N]^T$\f via
				/// 		prior mapping function \f$f(\mathbf{x})$\f. The difference to normal GPs is the uncertainty
				/// 		of \f$\mathbf{X}$\f, which will be initialized via PCA and optimized during learning.
				/// 		To avoid memory issues for larger data sets, the algorithm takes use of sparse approximation
				/// 		techniques.
				///		</para>
				///		<para>
				/// 		Sparse approximations are used for larger
				/// 		data sets to reduce memory size and computational complexity.  This is
				/// 		done by introducing a subset of inducing points or pseudo inputs to approximate
				/// 		the full set. The inversion of the kernel matrix depends only on those points
				/// 		and reduces the computational complexity from \f$O(N^3)$\f$ to $$O(k^2N)$\f, where
				/// 		\f$k\f$ is the number of inducing points and \f$N\f$ the length of the data set.
				///		</para>
				/// 	<para>
				///			References:
				///			<list type="bullet">
				///			<item>
				///			  	  <description><a href="http://mlg.eng.cam.ac.uk/thang/docs/papers/thesis-thang.pdf" target="_blank">
				///					 Bui, T. D. (2018). Efficient Deterministic Approximate Bayesian Inference for Gaussian 
				///					 Process models (Doctoral thesis). https://doi.org/10.17863/CAM.20913  </a>
				///			     </description>
				///			  </item>
				///		</para>
				/// 	
				/// 	
				/// 	, 24.11.2019. 
				/// </remarks>
				////////////////////////////////////////////////////////////////////////////////////////////////////
				template<typename Scalar>
				class NE_IMPEXP SDGPSSM : public SparseDeepGPSSMBaseModel<Scalar>
				{
				public:
					////////////////////////////////////////////////////////////////////////////////////////////////////
					/// <summary>	Constructor. </summary>
					///
					/// <remarks>	, 12.06.2018. </remarks>
					///
					/// <param name="Y">					 	The training data. </param>
					/// <param name="X">					 	The training inputs. </param>
					/// <param name="hiddenLayerdescription">	The description for one hidden layer. </param>
					/// <param name="alpha">				 	(Optional) The alpha. </param>
					/// <param name="lType">				 	(Optional) likelihood type. </param>
					////////////////////////////////////////////////////////////////////////////////////////////////////
					SDGPSSM(const af::array& Y, int latentDimension, HiddenLayerDescription description, Scalar alpha = 1.0,
						Scalar priorMean = 0.0, Scalar priorVariance = 1.0, af::array xControl = af::array(), 
						PropagationMode probMode = PropagationMode::MomentMatching, LogLikType lType = LogLikType::Gaussian, XInit emethod = XInit::pca);

					////////////////////////////////////////////////////////////////////////////////////////////////////
					/// <summary>	Constructor. </summary>
					///
					/// <remarks>	, 26.03.2018. </remarks>
					///
					/// <param name="Y">		   	The data af::array to process. </param>
					/// <param name="X">		   	The training inputs. </param>
					/// <param name="hiddenLayerdescriptions">	The hidden layer descriptions. </param>
					/// <param name="lType">	   	(Optional) the loglik type. </param>
					////////////////////////////////////////////////////////////////////////////////////////////////////
					SDGPSSM(const af::array& Y, int latentDimension, std::vector<HiddenLayerDescription> descriptions, Scalar alpha = 1.0,
						Scalar priorMean = 0.0, Scalar priorVariance = 1.0, af::array xControl = af::array(),
						PropagationMode probMode = PropagationMode::MomentMatching, LogLikType lType = LogLikType::Gaussian, XInit emethod = XInit::pca);

					////////////////////////////////////////////////////////////////////////////////////////////////////
					/// <summary>	Default Constructor. </summary>
					///
					/// <remarks>	Hmetal T, 29.11.2017. </remarks>
					///
					////////////////////////////////////////////////////////////////////////////////////////////////////
					SDGPSSM();

					////////////////////////////////////////////////////////////////////////////////////////////////////
					/// <summary>	Destructor. </summary>
					///
					/// <remarks>	, 23.04.2018. </remarks>
					////////////////////////////////////////////////////////////////////////////////////////////////////
					virtual ~SDGPSSM();

					////////////////////////////////////////////////////////////////////////////////////////////////////
					/// <summary>	Cost function the given parameter inputs. </summary>
					///
					/// <remarks>	Hmetal T, 29.11.2017. </remarks>
					///
					/// <param name="x">		  	The parameters to be optimized. </param>
					/// <param name="outGradient">	[in,out] The out gradient. </param>
					///
					/// <returns>	A Scalar. </returns>
					////////////////////////////////////////////////////////////////////////////////////////////////////
					virtual Scalar Function(const af::array& x, af::array& outGradient) override;

				protected:

					////////////////////////////////////////////////////////////////////////////////////////////////////
					/// <summary>	Computes the cavity distribution. </summary>
					///
					/// <remarks>	Hmetal T, 11/05/2020. </remarks>
					///
					/// <param name="mcav">	[in,out] The mcav. </param>
					/// <param name="vcav">	[in,out] The vcav. </param>
					/// <param name="cav1">	[in,out] The first cav. </param>
					/// <param name="cav2">	[in,out] The second cav. </param>
					////////////////////////////////////////////////////////////////////////////////////////////////////
					virtual void CavityLatents(af::array& mcav, af::array& vcav, af::array& cav1, af::array& cav2);

					////////////////////////////////////////////////////////////////////////////////////////////////////
					/// <summary>	Calculates the tilted transition. </summary>
					///
					/// <remarks>	Hmetal T, 11/05/2020. </remarks>
					///
					/// <param name="mprob">	   	The mprob. </param>
					/// <param name="vprob">	   	The vprob. </param>
					/// <param name="mcav_t1">	   	The first mcav t. </param>
					/// <param name="vcav_t1">	   	The first vcav t. </param>
					/// <param name="scaleLogZDyn">	The scale log z coordinate dynamic. </param>
					/// <param name="dmProb">	   	[in,out] The dm prob. </param>
					/// <param name="dvProb">	   	[in,out] The dv prob. </param>
					/// <param name="dmt">		   	[in,out] The dmt. </param>
					/// <param name="dvt">		   	[in,out] The dvt. </param>
					///
					/// <returns>	The calculated tilted transition. </returns>
					////////////////////////////////////////////////////////////////////////////////////////////////////
					Scalar ComputeTiltedTransition(const af::array& mprob, const af::array& vprob, const af::array& mcav_t1, const af::array& vcav_t1,
						Scalar scaleLogZDyn, af::array& dlogZ_dmProb, af::array& dlogZ_dvProb, af::array& dlogZ_dmt, af::array& dlogZ_dvt, Scalar& dlogZ_sn);

					////////////////////////////////////////////////////////////////////////////////////////////////////
					/// <summary>	Posterior gradient w.r.t \f$\mathbf{X}\f$. </summary>
					///
					/// <remarks>	Hmetal T, 11/05/2020. </remarks>
					///
					/// <returns>	An af::array. </returns>
					////////////////////////////////////////////////////////////////////////////////////////////////////
					virtual af::array PosteriorGradientLatents();

					////////////////////////////////////////////////////////////////////////////////////////////////////
					/// <summary>	Cavity gradient w.r.t \f$\mathbf{X}\f$. </summary>
					///
					/// <remarks>	Hmetal T, 11/05/2020. </remarks>
					///
					/// <param name="cav1">	The first cav. </param>
					/// <param name="cav2">	The second cav. </param>
					///
					/// <returns>	An af::array. </returns>
					////////////////////////////////////////////////////////////////////////////////////////////////////
					virtual af::array CavityGradientLatents(const af::array& cav1, const af::array& cav2);

					////////////////////////////////////////////////////////////////////////////////////////////////////
					/// <summary>	LogZ gradient w.r.t \f$\mathbf{X}\f$. </summary>
					///
					/// <remarks>	Hmetal T, 11/05/2020. </remarks>
					///
					/// <param name="cav1">		 	The first cav. </param>
					/// <param name="cav2">		 	The second cav. </param>
					/// <param name="dmcav_up">  	The dmcav up. </param>
					/// <param name="dvcav_up">  	The dvcav up. </param>
					/// <param name="dmcav_prev">	The dmcav previous. </param>
					/// <param name="dvcav_prev">	The dvcav previous. </param>
					/// <param name="dmcav_next">	The dmcav next. </param>
					/// <param name="dvcav_next">	The dvcav next. </param>
					///
					/// <returns>	An af::array. </returns>
					////////////////////////////////////////////////////////////////////////////////////////////////////
					virtual af::array LogZGradientLatents(const af::array& cav1, const af::array& cav2, const af::array& dmcav_up, const af::array& dvcav_up,
						const af::array& dmcav_prev, const af::array& dvcav_prev, const af::array& dmcav_next, const af::array& dvcav_next);

					////////////////////////////////////////////////////////////////////////////////////////////////////
					/// <summary>	Calculates the phi prior. </summary>
					///
					/// <remarks>	Hmetal T, 11/05/2020. </remarks>
					///
					/// <returns>	The calculated phi prior latents. </returns>
					////////////////////////////////////////////////////////////////////////////////////////////////////
					virtual Scalar ComputePhiPriorLatents();

					////////////////////////////////////////////////////////////////////////////////////////////////////
					/// <summary>	Calculates the phi cavity. </summary>
					///
					/// <remarks>	Hmetal T, 11/05/2020. </remarks>
					///
					/// <returns>	The calculated phi cavity latents. </returns>
					////////////////////////////////////////////////////////////////////////////////////////////////////
					virtual Scalar  ComputePhiCavityLatents();

					////////////////////////////////////////////////////////////////////////////////////////////////////
					/// <summary>	Calculates the phi posterior. </summary>
					///
					/// <remarks>	Hmetal T, 11/05/2020. </remarks>
					///
					/// <returns>	The calculated phi posterior latents. </returns>
					////////////////////////////////////////////////////////////////////////////////////////////////////
					virtual Scalar  ComputePhiPosteriorLatents();

				private:
					Scalar dAlpha;	//!< fraction parameter

					friend class boost::serialization::access;

					template<class Archive>
					void serialize(Archive& ar, unsigned int version)
					{
						ar& boost::serialization::base_object<SparseDeepGPSSMBaseModel<Scalar>>(*this);
						//ar& boost::serialization::make_nvp("SparseDeepGPLVMBaseModel", boost::serialization::base_object<SparseDeepGPLVMBaseModel<Scalar>>(*this));
						ar& BOOST_SERIALIZATION_NVP(dAlpha);
					}
				};
			}
		}
	}
}
/** @example AEP_SDGPSSM_Examples.cpp */
