### fit vcbod to clean (non-contaminated) data
library(doParallel)
library(foreach)
library(parallel)
#' vcbod_fit
#'
#' fit the outlier detector on uncontaminated data
#'
#' @param indata The training data
#' @param dependence Bool to indicate whether dependence shall be analysed
#' @param discr_thresh value between 0 and 1 indicating when a value is
#' treated as discrete
#' @param calc_fit Bool indicating if outlier scores of training data
#' are to be calculated
#' @param n_eval Number of grid-points, the higher, the more accurate, but slower
#' @param discount discount factor for copulas in higher trees
#' @param trunclvl truncation level for the vinecopula
#' @param ncores number of cores for parallelisation
#' @param verbose Bool indicating if stages during fitting shall be displayed
#'
#' @return fit information needed for vcbod and sample score, if asked for
#' @export
#'
#' @examples
#' vcbod_f <- vcbod_fit(make_test_data(), dependence = T, trunclvl = 2) # fit vcbod to example data
vcbod_fit <- function(indata,
                  dependence = T,
                  discr_thresh = 0.1,
                  calc_fit = T,
                  n_eval = 100,
                  discount = 0.9,
                  trunclvl = 3,
                  ncores = 1,
                  verbose = T){

  # check for correct input
  check_args(indata = indata,
             trunclvl = trunclvl)

  # pair copulas and parmethod in dependence of sample size
  pc <- if(dim(indata)[1] < 50) 2 else 1
  famset = list(c("tll"), c("gaussian", "clayton", "gumbel", "frank"))[[pc]]
  parmethod = c("mle", "itau")[pc]

  # calculate individual outlier functions
  outl <- outlscore_maker_fit(indata = indata,
                              dependence = dependence,
                              calc_fit = calc_fit,
                              discr_thresh = discr_thresh,
                              n_eval = n_eval,
                              discount = discount,
                              trunclvl = trunclvl,
                              famset = famset,
                              parmethod = parmethod,
                              ncores = ncores,
                              verbose = verbose)

  # calculate dependence factor
  if(!is.null(outl[["cops"]])){
    dep_vc <- sum(sapply(outl[["cops"]], function(x) x[["signif"]]))
  } else {
    dep_vc <- 0
  }

  # if required, calculate sample scores and fit info
  if(calc_fit){
    fit_scores <- sample_scores_fit(outl = outl,
                                    dep_vc = dep_vc)

    sample_scores <- fit_scores[["final_scores"]]
  } else { # end if(calc_fit)

    sample_scores <- c()
    fit_scores <- NULL
  }

  return(list("sample_scores" = sample_scores,
              "fit_scores" = fit_scores,
              "dep_vc" = dep_vc,
              "misc" = outl))
}

### vcbod to be applied to test data
#' vcbod
#'
#' Outlier detector for fresh unseen data
#'
#' @param new_data new data to test
#' @param vcb_fit trained vcbod = output from \link{vcbod_fit}
#' @param prob quantile for threshold
#' @param ncores number of cores for parallelisation
#' @param verbose Bool indicating if stages during fitting shall be displayed
#'
#' @return outlier scores, assigned labels and threshold
#' @export
#'
#' @examples
#' # generate training data:
#' data_train <- make_test_data(n_draws = 1000)
#' # fit vcbod to training data:
#' vcbod_f <- vcbod_fit(data_train, ncores = 4)
#' # c(-2, 0, -2) is not in distribution, c(2, 0, -2) is in distribution
#' # run vcbod on new points:
#' vcbod(rbind(c(-2, 0, -2), c(2, 0, -2)), vcb_fit = vcbod_f)
#'
#' # second example with discrete data, no dependence
#' data_train <- cbind(data_train, c(rep(1, 700), rep(2, 200), rep(3, 100)))
#' vcbod_f <- vcbod_fit(data_train, ncores = 4)
#' vcbod(rbind(c(-2, 0, -2, 0), c(2, 0, -2, 2)), vcb_fit = vcbod_f)
#'
#'# third example, fourth feature mixed, no dependence with rest
#'data_train[, 4] <- c(rep(10, 700), rnorm(300))
#'vcbod_f <- vcbod_fit(data_train, ncores = 4)
#'vcbod(rbind(c(-2, 0, -2, -2), c(2, 0, -2, 10)), vcb_fit = vcbod_f)
#'
vcbod <- function(new_data,
                  vcb_fit,
                  prob = 0.99,
                  ncores = 1,
                  verbose = T){

  # check for correct input
  new_data <- check_new_data(new_data = new_data,
                             vcb_fit = vcb_fit)

  # calculate marginal and dependence outlier scores
  outl <- outlscore_maker(new_data,
                          ncores = ncores,
                          vcb_fit = vcb_fit,
                          verbose = verbose)

  # calculate sample scores and addition info
  scores <- sample_scores(outl = outl,
                          fit_scores = vcb_fit[["fit_scores"]])

  scores_info <- scores[["scores_info"]]
  scores <- scores[["final_scores"]]

  # calculate labels based on the threshold
  if(length(vcb_fit[["sample_scores"]]) > 0){
    thresh <- quantile(vcb_fit[["sample_scores"]], probs = prob)
    new_labels <- sapply(scores, function(x) if(x > thresh) 1 else 0)
  } else {
    thresh <- quantile(scores, probs = prob)
    new_labels <- sapply(scores, function(x) if(x > thresh) 1 else 0)
  }

  return(list("scores" = scores
              , "labels" = new_labels
              , "thresh" = thresh
              , "scores_info" = scores_info
              ))
}


