#' @title Create density functions from raw data, histogram objects or frequency tables with bins
#'
#' @description Create kernel density estimate along the support of the raw data using the HADES method.
#'
#' @param y A vector of raw readings.
#' @param histogram A \code{histogram} object in R. Use this option when histogram object is only available, but not the raw data \code{y}. The default is \code{NULL}.
#' @param freq A frequency vector. Use this option when frequency table is only available, but not the raw sample or the histogram object. The corresponding \code{bin} should be provided together. The default is \code{NULL}.
#' @param bin A bin vector having its length with \code{length(freq)+1}. Use this option when frequency table is only available, but not the raw sample or the histogram object. The corresponding \code{freq} should be provided together.The default is \code{NULL}.
#' @param optns A list of options control parameters specified by \code{list(name=value)}. See `Details'.
#' @details Available control options are
#' \describe{
#' \item{userBwMu}{The bandwidth value for the smoothed mean function; positive numeric - default: determine automatically based on the data-driven bandwidth selector proposed by Sheather and Jones (1991)}
#' \item{nRegGrid}{The number of support points the KDE; numeric - default: 101.}
#' \item{delta}{The size of the bin to be used; numeric - default: \code{diff(range(y))/1000}. It only works when the raw sample is available.}
#' \item{kernel}{smoothing kernel choice, \code{"rect"}, \code{"gauss"}, \code{"epan"}, \code{"gausvar"}, \code{"quar"} - default: \code{"gauss"}.}
#' \item{infSupport}{logical if we expect the distribution to have infinite support or not; logical - default: \code{FALSE}.}
#' \item{outputGrid}{User defined output grid for the support of the KDE, it overrides \code{nRegGrid}; numeric - default: \code{NULL}.}
#' }
#'
#' @return A list containing the following fields:
#' \item{bw}{The bandwidth used for smoothing.}
#' \item{x}{A vector of length \code{nRegGrid} with the values of the KDE's support points.}
#' \item{y}{A vector of length \code{nRegGrid} with the values of the KDE at the support points.}
#'
#' @examples
#'
#' ### compact support case
#'
#' # input: raw sample
#' set.seed(100)
#' n <- 100
#' x0 <- seq(0, 1, length.out = 51)
#' Y <- rbeta(n, 3, 2)
#' f1 <- CreateDensity(y = Y, optns = list(outputGrid = x0))
#'
#' # input: histogram
#' histY <- hist(Y)
#' f2 <- CreateDensity(histogram = histY, optns = list(outputGrid = x0))
#'
#' # input: frequency table with unequally spaced (random) bins
#' binY <- c(0, sort(runif(9)), 1)
#' freqY <- c()
#' for (i in 1:(length(binY) - 1)) {
#'   freqY[i] <- length(which(Y > binY[i] & Y <= binY[i + 1]))
#' }
#' f3 <- CreateDensity(freq = freqY, bin = binY, optns = list(outputGrid = x0))
#'
#' # plot
#' plot(f1$x, f1$y,
#'   type = "l", col = 2, lty = 2, lwd = 2,
#'   xlim = c(0, 1), ylim = c(0, 2), xlab = "domain", ylab = "density"
#' )
#' points(f2$x, f2$y, type = "l", col = 3, lty = 3, lwd = 2)
#' points(f3$x, f3$y, type = "l", col = 4, lty = 4, lwd = 2)
#' points(x0, dbeta(x0, 3, 2), type = "l", lwd = 2)
#' legend("topleft",
#'   c("true", "raw sample", "histogram", "frequency table (unequal bin)"),
#'   col = 1:4, lty = 1:4, lwd = 3, bty = "n"
#' )
#'
#' ### infinite support case
#'
#' # input: raw sample
#' set.seed(100)
#' n <- 200
#' x0 <- seq(-3, 3, length.out = 101)
#' Y <- rnorm(n)
#' f1 <- CreateDensity(y = Y, optns = list(outputGrid = x0))
#'
#' # input: histogram
#' histY <- hist(Y)
#' f2 <- CreateDensity(histogram = histY, optns = list(outputGrid = x0))
#'
#' # input: frequency table with unequally spaced (random) bins
#' binY <- c(-3, sort(runif(9, -3, 3)), 3)
#' freqY <- c()
#' for (i in 1:(length(binY) - 1)) {
#'   freqY[i] <- length(which(Y > binY[i] & Y <= binY[i + 1]))
#' }
#' f3 <- CreateDensity(freq = freqY, bin = binY, optns = list(outputGrid = x0))
#'
#' # plot
#' plot(f1$x, f1$y,
#'   type = "l", col = 2, lty = 2, lwd = 2,
#'   xlim = c(-3, 3), ylim = c(0, 0.5), xlab = "domain", ylab = "density"
#' )
#' points(f2$x, f2$y, type = "l", col = 3, lty = 3, lwd = 2)
#' points(f3$x, f3$y, type = "l", col = 4, lty = 4, lwd = 2)
#' points(x0, dnorm(x0), type = "l", lwd = 2)
#' legend("topright",
#'   c("true", "raw sample", "histogram", "frequency table (unequal bin)"),
#'   col = 1:4, lty = 1:4, lwd = 3, bty = "n"
#' )
#'
#' @references
#' \itemize{
#' \item \cite{H.-G. Müller, J.L. Wang and W.B. Capra (1997). "From lifetables to hazard rates: The transformation approach." Biometrika 84, 881--892.}
#' \item \cite{S.J. Sheather and M.C. Jones (1991). "A reliable data-based bandwidth selection method for kernel density estimation." JRSS-B 53, 683--690.}
#' \item \cite{H.-G. Müller, U. Stadtmüller, and T. Schmitt. (1987) "Bandwidth choice and confidence intervals for derivatives of noisy data." Biometrika 74, 743--749.}
#' }
#' @export

CreateDensity <-
  function(y = NULL,
           histogram = NULL,
           freq = NULL,
           bin = NULL,
           optns = list()) {
    if (is.null(y) == TRUE) {
      if (is.null(histogram) == FALSE) {
        if (is.null(histogram$breaks)) {
          mids <- histogram$mids
          bin <- mids[1] - (mids[2] - mids[1]) / 2
          for (i in 2:length(mids)) {
            bin[i] <- (mids[i - 1] + mids[i]) / 2
          }
          bin[length(mids) + 1] <-
            mids[length(mids)] + (mids[length(mids)] - mids[length(mids) - 1]) / 2
        } else {
          bin <- histogram$breaks
        }

        freq <- histogram$counts
      }

      if (is.null(freq) == TRUE) {
        if ((length(freq) + 1) != length(bin)) {
          stop("length(bin) should equal to length(freq)+1.")
        }

        # mids <- c()
        # for (i in 1:length(freq)) {
        #   mids[i] <- (bin[i]+bin[i+1])/2
        # }
      }

      y <- c()
      for (i in 1:length(freq)) {
        if (freq[i] != 0) {
          yTmp <- seq(bin[i], bin[i + 1], length.out = (freq[i] + 2))
          y <- c(y, yTmp[-c(1, length(yTmp))])
        }
      }
    }

    if (is.null(optns$kernel)) {
      kernel <- "gauss"
    } else {
      kernel <- optns$kernel
    }

    if (is.null(optns$delta)) {
      delta <- diff(range(y)) / 1000
      # delta = max(c( diff(range(y))/1000, min(diff(sort(unique(y)))) ))
    } else {
      delta <- optns$delta
    }

    if (is.null(optns$nRegGrid)) {
      nRegGrid <- 101
    } else {
      nRegGrid <- optns$nRegGrid
    }

    if (is.null(optns$outputGrid)) {
      outputGrid <- NULL
    } else {
      outputGrid <- optns$outputGrid
    }

    if (is.null(optns$infSupport)) {
      infSupport <- FALSE
    } else {
      infSupport <- optns$infSupport
    }

    N <- length(y)
    # histgrid = seq( min(y)-delta*0.5, max(y)+delta*0.5, by = delta );
    histgrid <- c(seq(min(y), max(y), by = delta) - delta * 0.5, max(y) + delta *
      0.5)
    if ((max(y) + delta * 0.5) > histgrid[length(histgrid)]) {
      histgrid[length(histgrid)] <- max(y) + delta * 0.5
    }
    M <- length(histgrid)
    histObj <- hist(y, breaks = histgrid, plot = FALSE)

    yin <- histObj$density
    xin <- histObj$mids
    # yin = histObj$counts[1:M-1] / N / delta;
    # xin = seq( min(y), max(y), by = delta);

    if (is.null(optns$userBwMu)) {
      densTmp <- density(y, kernel = "epanechnikov")
      bw <- 2 * densTmp$bw
      # bw <- 2*densTmp$bw

      # bwCV = fdapace:::CVLwls1D(y = yin, t = xin, kernel = kernel, npoly = 1, nder = 0, dataType = 'Dense', kFolds = 5)
      # bw <- bwCV
    } else {
      bw <- optns$userBwMu
    }

    densObj <- list()
    densObj$bw <- bw
    densObj$x <- outputGrid
    if (infSupport) {
      if (is.null(outputGrid)) {
        densObj$x <- seq(min(y) - delta, max(y) + delta, length.out = nRegGrid)
      }
      qpadding <- 10
      mu <- fdapace::Lwls1D(
        bw = bw,
        kernel_type = kernel,
        win = rep(1, M + (-1 + 2 * qpadding)),
        xin = c(min(y) - 11:2 * delta, xin, max(y) + 2:11 * delta),
        yin = c(rep(0, qpadding), yin, rep(0, qpadding)),
        xout = densObj$x
      )
    } else {
      if (is.null(outputGrid)) {
        densObj$x <-
          outputGrid <- seq(min(y), max(y), length.out = nRegGrid)
      }
      if (min(xin) > outputGrid[1]) {
        qpad1 <- seq(outputGrid[1], min(xin), by = delta)
      } else {
        qpad1 <- numeric()
      }
      if (max(xin) < outputGrid[length(outputGrid)]) {
        qpad2 <- seq(outputGrid[length(outputGrid)], max(xin), by = -delta)
        qpad2 <- qpad2[length(qpad2):1]
      } else {
        qpad2 <- numeric()
      }
      xinNew <- c(qpad1, xin, qpad2)
      yinNew <- c(rep(0, length(qpad1)), yin, rep(0, length(qpad2)))

      # note that Lwls1D requires xin to be at least of length 6
      mu <- fdapace::Lwls1D(
        bw = bw,
        kernel_type = kernel,
        win = rep(1, length(xinNew)),
        xin = xinNew,
        yin = yinNew,
        xout = densObj$x
      )
    }
    mu[mu < 0] <- 0
    # densObj$y = mu / fdapace:::trapzRcpp(densObj$x, mu)
    densObj$y <- mu / pracma::trapz(densObj$x, mu)

    return(densObj)
  }
