

# Load libraries ---------------------------

library(scmamp)
library(ggplot2)



# Set up variables 
data.file <- "results.dat"
delimiter <- ","
plot.dir<-"./"
significance_level <- 0.05
alg.columns <- c("TS", "SA", "CMSA","LNS")
instance.descriptors <- c("size")
to.ignore <- c("inst")
average.function <- mean


#' Load data
#' 

data <- read.csv(data.file, sep=delimiter, header = TRUE)

#' Create a summarization table with the average results per instance
summary.data <- summarizeData(data = data, fun = average.function, 
                              group.by = instance.descriptors, ignore = to.ignore)

#################
#' OMNIBUS TEST #
#################

# Perform Friedman and Iman-Davenport tests
friedmanTest(data[, alg.columns])
imanDavenportTest(data[, alg.columns])

# Perform multiple comparison test
multipleComparisonTest(data=data[, alg.columns], test="iman")


########################
#' POST-HOC COMPARISON #
########################


# Perform Nemenyi test
nemenyi_test_result <- nemenyiTest(data[, alg.columns])
nemenyi_test_result
nemenyi_test_result$diff.matrix

# Comparing <= 9 algorithms:'Bergmann and Hommel, but it is a complex and computationally expensive method.
# Comparing  > 9 algorithms: Shaffer’s static method

# Perform post-hoc test with Bergmann correction:
 bergmann_posthoc_results <- postHocTest(data = data[,alg.columns], test = 'friedman', correct = 'bergmann')



#' Now, two-fold post-hoc comparison. For the table, the best in each instance vs. the rest
#' 
#' The alpha is the p-value. If you want to have a statistical significiance level, you can change it.

all.vs.best.results <- postHocTest(data=data, algorithms=alg.columns, 
                                   group.by=instance.descriptors, test="friedman",
                                   control="max", use.rank=FALSE, sum.fun=average.function,
                                   correct="finner", alpha=significance_level)



#' Now, all vs. all for the plots.
#' 

# Set decreasing=TRUE when the best results are the smallest.
all.vs.all <- postHocTest(data=data, algorithms=alg.columns, test="friedman", 
                          control=NULL, use.rank=TRUE, sum.fun=average.function,
                          correct="finner", alpha=significance_level, decreasing=FALSE)

#' Same but grouped 
#' 

all.vs.all.grouped <- postHocTest(data, algorithms=alg.columns, test="friedman",
                                  group.by=c("size"), control=NULL, use.rank=TRUE,
                                  sum.fun=average.function, correct="finner", alpha=significance_level, decreasing=FALSE)

#' ## Plots

#' Create a plot similar to Demsar's critical difference plot.


pdf(file=paste0(plot.dir, "plots/CD_plot_all.pdf"), width=10, height=2.2)
plotRanking(all.vs.all$corrected.pval, summary=all.vs.all$summary, alpha=significance_level)
dev.off()



#' ## References
#' Demšar, J. (2006) Statistical Comparisons of Classifiers over Multiple Data Sets. _Journal of Machine Learning Research_, 7, 1-30. 

