### misc functions

check_args <- function(indata, trunclvl){
  if(!is.matrix(indata)){
    stop("please provide data as matrix")
  } else if(anyNA(indata)){
    stop("NAN's not supported")
  } else if(dim(indata)[2] < 2){
    stop("data should have at least two variables")
  } else if(dim(indata)[1] < 10){
    stop("data should have at least ten observations")
  }

  if(trunclvl < 1){ stop("trunclvl must be integer >= 1")}
}

check_new_data <- function(new_data, vcb_fit){
  if(is.vector(new_data)) {new_data <- matrix(new_data, nrow = 1)}
  if(!is.matrix(new_data)){
    stop("please provide data as matrix,
         or alternatively as vector for single observation")
  } else if(anyNA(new_data)){
    stop("NAns not supported")
  } else if(dim(new_data)[2] != length(vcb_fit[["misc"]][["margins"]])){
    stop("new data must be of same dim as training data")
  }

  return(new_data)
}

calc_indiv_scores <- function(cops, n_var, n_obs, c_vars){

  var_scores <- matrix(0, nrow = n_obs, ncol = n_var)
  max_scores <- rep(0, n_var)

  for(i in 1:length(cops)){
    if(cops[[i]][["signif"]] == 0){ next }
    for(j in 1:2){
      # find actual number of variables
      l <- c_vars[ cops[[i]][["lrarg"]][j] ]

      var_scores[ , l] <- var_scores[ , l] + cops[[i]][["sample_scores"]]
      max_scores[l] <- max_scores[l] + cops[[i]][["signif"]]
    }
  }

  return(list("var_scores" = var_scores,
              "max_scores" = max_scores))
}

paircop_scores <- function(cops, train_cops){

  pair_quants <- c()
  max_scores <- NULL
  col_names <- c()

  for(i in 1:length(cops)){

    if(cops[[i]][["signif"]] == 0){ next }

    # find correponding train_cop
    for(j in 1:length(train_cops)){
      if(sum(train_cops[[j]][["position"]] == cops[[i]][["position"]]) == 2){
        j_train <- j
        break
      }
    }

    pair_quants <- cbind(pair_quants,
                         ecdf(train_cops[[j_train]][["sample_scores"]])(cops[[i]][["sample_scores"]]))

    max_scores <- c(max_scores, cops[[i]][["signif"]])

    if(is.null(cops[[i]][["cond"]])){
      new_name <- as.character(paste(cops[[i]][["lrarg"]], collapse = ","))
    } else {
      new_name <- as.character(paste(paste(cops[[i]][["lrarg"]], collapse = ","),
                                      cops[[i]][["cond"]], sep = "|"))
    }

    col_names <- c(col_names, new_name)
  }

  colnames(pair_quants) <- col_names

  return(list("pair_quants" = pair_quants,
              "max_scores" = max_scores)
         )
}

# calculate final sample scores, as well as scores per variable
sample_scores_fit <- function(outl, dep_vc){

  fact <- 1

  # extract margin scores
  margin_scores <- sapply(outl[["margins"]], function(x) x[["sample_scores"]])

  # if no dependence, only regard marginal scores
  if(dep_vc == 0){
    final_scores <- apply(margin_scores, 1, sum)/length(outl[["margins"]])
    cop_scores <- NULL
    max_scores <- NULL
  } else {
    # find the variables which have dependence
    c_vars <- which(sapply(outl[["margins"]], function(x) !x[["d_flag"]]))

    # calculate dependence score per observation per variable
    cop_scores <- calc_indiv_scores(cops = outl[["cops"]],
                                    n_var = length(outl[["margins"]]),
                                    n_obs = length(outl[["margins"]][[1]][["sample_scores"]]),
                                    c_vars = c_vars)

    max_scores <- fact*cop_scores[["max_scores"]]
    cop_scores <- fact*cop_scores[["var_scores"]]

    # add margins and dependence scores for each variable
    sample_scores <- margin_scores + cop_scores

    # standardize each variable
    sample_scores <- sweep(sample_scores, 2, (max_scores + 1)^-1, '*')

    # add up variables and standardize sum
    final_scores <- rowSums(sample_scores)/dim(sample_scores)[2]
  }

  # final score is vector, margin_score and cop_score matrix, columns are variables
  return(list("final_scores" = final_scores,
              "margin_scores" = margin_scores,
              "cop_scores" = cop_scores,
              "cops_fit" = outl[["cops"]]))
}


# calculate final sample scores
# fti_scores is output of sample_scores_fit run in vcbod_fit
sample_scores <- function(outl, fit_scores){

  fact <- 1
  ### calculate new scores

  # extract margin scores
  margin_scores <- sapply(outl[["margins"]],
                          function(x) x[["sample_scores"]])

  if(is.vector(margin_scores)){ margin_scores <- t(margin_scores) }

  m_scores <- apply(margin_scores, 1, sum)/length(outl[["margins"]])

  # if no dependence, only regard marginal scores
  if( is.null(outl[["cops"]]) ){
    cop_scores <- NULL

    final_scores <- m_scores

  } else {
    # find the variables which have dependence
    c_vars <- which(sapply(outl[["margins"]], function(x) !x[["d_flag"]]))

    # calculate dependence score per observation per variable
    cop_scores <- calc_indiv_scores(cops = outl[["cops"]],
                                    n_var = length(outl[["margins"]]),
                                    n_obs = length(outl[["margins"]][[1]][["sample_scores"]]),
                                    c_vars = c_vars)

    max_scores <- fact*cop_scores[["max_scores"]]
    cop_scores <- fact*cop_scores[["var_scores"]]

    # add margins and dependence scores for each variable
    sample_scores <- margin_scores + cop_scores

    # standardize each variable
    sample_scores <- sweep(sample_scores, 2, (max_scores + 1)^-1, '*')

    # add up variables and standardize sum
    final_scores <- rowSums(sample_scores)/dim(sample_scores)[2]
  }

  ##############################################################################
  # if fit_scores, calculate outlier statistics
  scores_info <- NULL
  if(!is.null(fit_scores)){
    # calculate quantiles for marginal and total score
    m_quants <- ecdf(apply(fit_scores[["margin_scores"]], 1, mean))(m_scores)

    if( is.null(outl[["cops"]]) ){
      # overall margin quantiles = tot_quants
      tot_quants <- NULL

      # calculate quantiles of each variable score of new points
      # matrix with as many rows as new observations
      var_marg_quants <- matrix(0,
                                nrow = dim(margin_scores)[1],
                                ncol = dim(margin_scores)[2])

      for(i in 1:dim(margin_scores)[2]){
        var_marg_quants[, i] <- ecdf(fit_scores[["margin_scores"]][, i])(margin_scores[, i])
      }

      # set dependence info to NULL
      var_tot_quants <- NULL
      var_dep_scores <- NULL
      pair_dep_scores <- NULL

    } else {
      tot_quants <- ecdf(fit_scores[["final_scores"]])(final_scores)

      # reconstruct variable scores of train data
      indiv_train_scores <- fit_scores[["margin_scores"]] + fit_scores[["cop_scores"]]
      indiv_train_scores <- sweep(indiv_train_scores, 2, (max_scores + 1)^-1, '*')

      # calculate quantiles of each variable score of new points
      # matrix with as many rows as new observations
      var_tot_quants <- matrix(0,
                           nrow = dim(margin_scores)[1],
                           ncol = dim(fit_scores[["margin_scores"]])[2])

      for(i in 1:dim(margin_scores)[2]){
        var_tot_quants[, i] <- ecdf(indiv_train_scores[, i])(sample_scores[, i])
      }

      # calculate quantiles of each variable score of new points
      # matrix with as many rows as new observations
      var_marg_quants <- matrix(0,
                                nrow = dim(margin_scores)[1],
                                ncol = dim(margin_scores)[2])

      for(i in 1:dim(margin_scores)[2]){
        var_marg_quants[, i] <- ecdf(fit_scores[["margin_scores"]][, i])(margin_scores[, i])
      }

      # matrix with as many rows as new observations
      var_dep_quants <- matrix(0,
                                nrow = dim(margin_scores)[1],
                                ncol = dim(margin_scores)[2])

      for(i in 1:dim(margin_scores)[2]){
        var_dep_quants[, i] <- ecdf(fit_scores[["cop_scores"]][, i])(cop_scores[, i])
      }

      pair_scores <- paircop_scores(cops = outl[["cops"]],
                                    train_cops = fit_scores[["cops_fit"]])


    } # end else of if(is.null(outl[["cops"]]))

  scores_info <- list("m_quants" = m_quants,
                      "tot_quants" = tot_quants,
                      "var_tot_quants" = var_tot_quants,
                      "var_marg_quants" = var_marg_quants,
                      "var_dep" = list("quants" = var_dep_quants,
                                       "maxes" = max_scores),
                      "pair_scores" = pair_scores
                      )

  } # end if(!is.null(fit_scores))

  # return final scores of new points and fit statistics
  return(list("final_scores" = final_scores,
              "scores_info" = scores_info))

}

