#' Kullback-Leibler divergence between two multivariate normal distributions
#'
#' Function calculating the Kullback-Leibler divergence between two
#' multivariate normal distributions.
#'
#' @details
#' The Kullback-Leibler (KL) information (Kullback and Leibler, 1951; also known
#' as relative entropy) is a measure of divergence between two probability
#' distributions. Typically, one distribution is taken to represent the `true'
#' distribution and functions as the reference distribution while the other is
#' taken to be an approximation of the true distribution. The criterion then
#' measures the loss of information in approximating the reference distribution.
#' The KL divergence between two \eqn{p}-dimensional multivariate normal
#' distributions
#'   \eqn{\mathcal{N}^{0}_{p}(\boldsymbol{\mu}_{0}, \mathbf{\Sigma}_{0})} and \eqn{\mathcal{N}^{1}_{p}(\boldsymbol{\mu}_{1}, \mathbf{\Sigma}_{1})}
#' is given as
#'   \deqn{
#'     \mathrm{I}_{KL}(\mathcal{N}^{0}_{p} \| \mathcal{N}^{1}_{p}) =
#'      \frac{1}{2}\left\{\mathrm{tr}(\mathbf{\Omega}_{1}\mathbf{\Sigma}_{0})
#'      + (\boldsymbol{\mu}_{1} - \boldsymbol{\mu}_{0})^{\mathrm{T}}
#'      \mathbf{\Omega}_{1}(\boldsymbol{\mu}_{1} - \boldsymbol{\mu}_{0}) - p
#'      - \ln|\mathbf{\Sigma}_{0}| + \ln|\mathbf{\Sigma}_{1}| \right\},
#'   }
#' where \eqn{\mathbf{\Omega} = \mathbf{\Sigma}^{-1}}. The KL divergence is not
#' a proper metric as \eqn{\mathrm{I}_{KL}(\mathcal{N}^{0}_{p} \|
#' \mathcal{N}^{1}_{p}) \neq \mathrm{I}_{KL}(\mathcal{N}^{1}_{p} \|
#' \mathcal{N}^{0}_{p})}. When \code{symmetric = TRUE} the function calculates
#' the symmetric KL divergence (also referred to as Jeffreys information), given
#' as
#'   \deqn{
#'     \mathrm{I}_{KL}(\mathcal{N}^{0}_{p} \| \mathcal{N}^{1}_{p}) +
#'     \mathrm{I}_{KL}(\mathcal{N}^{1}_{p} \| \mathcal{N}^{0}_{p}).
#'   }
#'
#' @param Mtest A \code{numeric} mean vector for the approximating multivariate
#' normal distribution.
#' @param Mref A \code{numeric} mean vector for the true/reference multivariate
#' normal distribution.
#' @param Stest A covariance \code{matrix} for the approximating multivariate
#' normal distribution.
#' @param Sref A covariance \code{matrix} for the true/reference multivariate
#' normal distribution.
#' @param symmetric A \code{logical} indicating if the symmetric version of
#' Kullback-Leibler divergence should be calculated.
#' @return Function returns a \code{numeric} representing the (symmetric)
#' Kullback-Leibler divergence.
#' @author Wessel N. van Wieringen, Carel F.W. Peeters <carel.peeters@@wur.nl>
#' @seealso \code{\link{covML}}, \code{\link{ridgeP}}
#' @references Kullback, S. and Leibler, R.A. (1951). On Information and
#' Sufficiency. Annals of Mathematical Statistics 22: 79-86.
#' @examples
#'
#' ## Define population
#' set.seed(333)
#' p = 25
#' n = 1000
#' X = matrix(rnorm(n*p), nrow = n, ncol = p)
#' colnames(X)[1:25] = letters[1:25]
#' Cov0  <- covML(X)
#' mean0 <- colMeans(X)
#'
#' ## Obtain sample from population
#' samples <- X[sample(nrow(X), 10),]
#' Cov1  <- covML(samples)
#' mean1 <- colMeans(samples)
#'
#' ## Regularize singular Cov1
#' P <- ridgeP(Cov1, 10)
#' CovR <- solve(P)
#'
#' ## Obtain KL divergence
#' KLdiv(mean1, mean0, CovR, Cov0)
#'
#' @export
KLdiv <- function(Mtest, Mref, Stest, Sref, symmetric = FALSE){
  if (!inherits(Mtest, "numeric")){
    stop("Input (Mtest) is of wrong class")
  }
  else if (!inherits(Mref, "numeric")){
    stop("Input (Mref) is of wrong class")
  }
  else if (length(Mtest) != length(Mref)){
    stop("Mtest and Mref should be of same length")
  }
  else if (!is.matrix(Stest)){
    stop("Input (Stest) is of wrong class")
  }
  else if (!is.matrix(Sref)){
    stop("Input (Sref) is of wrong class")
  }
  else if (!isSymmetric(Stest)){
    stop("Stest should be symmetric")
  }
  else if (!isSymmetric(Sref)){
    stop("Sref should be symmetric")
  }
  else if (dim(Stest)[1] != length(Mtest)){
    stop("Column and row dimension of Stest should correspond to length Mtest")
  }
  else if (dim(Sref)[1] != length(Mref)){
    stop("Column and row dimension of Sref should correspond to length Mref")
  }
  else if (!inherits(symmetric, "logical")){
    stop("Input (symmetric) is of wrong class")
  }
  else {
    # Evaluate KL divergence
    KLd <- (sum(diag(solve(Stest) %*% Sref)) +
              t(Mtest - Mref) %*% solve(Stest) %*% (Mtest - Mref) -
              nrow(Sref) - log(det(Sref)) + log(det(Stest)))/2
    
    # Evaluate (original) symmetric version KL divergence
    if (symmetric){
      KLd <- KLd + (sum(diag(solve(Sref) %*% Stest)) +
                      t(Mref - Mtest) %*% solve(Sref) %*% (Mref - Mtest) -
                      nrow(Sref) - log(det(Stest)) + log(det(Sref)))/2
    }
    
    # Return
    return(as.numeric(KLd))
  }
}