m <- read.table("../tables/statistical-data.csv", sep=";", as.is=T, header=T)
table(m$graph)

# for convenient visualizations make quality measures to [0,1], the higher the better

m$success <- as.numeric(m$success)
m$mAP <- as.numeric(m$mAP)
m$meanrank <- as.numeric(m$meanrank)
m$stretch <- as.numeric(m$stretch)

# inv_meanrank <- 1 / meanrank ; inv_stretch <- 1/stretch
m$invmr <- 1/m$meanrank
m$invstr <- 1/m$stretch

sim <- subset(m, graph=="simulated")

table(sim$method)

# a different ranking with centiles

library(tidyverse)

prepare_ranking_ecdf <- function(df, df_orig, graph_list, measure) {
  for (graph1 in graph_list) {
    a <- subset(df_orig, df_orig$graph==graph1)
    
    placeholder = data.frame(matrix(nrow = nrow(a), ncol = 4)) 
    colnames(placeholder) = c("name","graph","rank","ecdf_r") 
    placeholder$name <- a$name
    placeholder$graph <- a$graph
    placeholder$rank <-  rank(a[[measure]], na.last = "keep")
    ecdf_ranking <- ecdf(placeholder$rank)
    
    for (i in 1:nrow(a)) {
      placeholder$ecdf_r[i] <- ecdf_ranking(placeholder$rank[i])
    }
    
    df <- rbind(df, placeholder)
  }
  return(df)
}

draw_centile_rankings <- function(filename, df, title) {
  
  methods <- unique(df$name)
  #print(methods)
  
  placeholder_heat <- data.frame(matrix(nrow = length(methods), ncol = 11))
  colnames(placeholder_heat) <- c("method","p0_10", "p10_20", "p20_30", "p30_40", "p40_50", "p50_60", "p60_70", "p70_80", "p80_90", "p90_100") 
  
  placeholder_heat$method <- methods
  placeholder_heat$p0_10 <- 0
  placeholder_heat$p10_20 <- 0
  placeholder_heat$p20_30 <- 0
  placeholder_heat$p30_40 <- 0
  placeholder_heat$p40_50 <- 0
  placeholder_heat$p50_60 <- 0
  placeholder_heat$p60_70 <- 0
  placeholder_heat$p70_80 <- 0
  placeholder_heat$p80_90 <- 0
  placeholder_heat$p90_100 <- 0
  
  print("Prepared placeholder_heat")
  
  for (i in 1:nrow(df)) {
    #print(paste("for loop i=", i))
    col <- ifelse(floor(10*df$ecdf_r[i]+1) == 11, 10, floor(10*df$ecdf_r[i]+1))+1
    row <- which(placeholder_heat$method==df$name[i])
    placeholder_heat[row,col] <- placeholder_heat[row,col]+1 
  }
  
  print("Computed placeholder_heat")
  
  placeholder_heat_freq <- as.data.frame(t(apply(placeholder_heat[,2:11], 1, function(i) i/sum(i))))
  
  colnames(placeholder_heat_freq) <- c("p0_10", "p10_20", "p20_30", "p30_40", "p40_50", "p50_60", "p60_70", "p70_80", "p80_90", "p90_100") 
  placeholder_heat_freq$method <- placeholder_heat$method
  
  print("Computed placeholder_heat_freq")
  
  library(data.table)
  long <- melt(setDT(placeholder_heat_freq), id.vars = c("method"), variable.name = "perc")
  print("Reshaped to long")
  
  pdf(filename)
  
  g <- ggplot(long, aes(perc, method, fill = value)) +
    geom_tile() +
    scale_fill_gradient(low = "white", high = "forestgreen") +
    coord_equal() + xlab("percentile of ranks") + ylab(title) +
    scale_x_discrete(guide = guide_axis(angle = 45)) + theme_minimal(base_size = 15) + 
    theme(legend.direction = "horizontal", legend.position="none")
  print(g)
  
  #print(heatmap(as.matrix(placeholder_heat_freq), Rowv = NA, Colv = NA, scale="none"))
  dev.off() 
}

# table with real graphs
q <- subset(m, graph!="simulated")
q <- subset(q, graph!="sim3")

q <- subset(q, q$sid==0)
q <- unique(q)

q$graph <- as.factor(q$graph)
q <- subset(q, graph!="mammal")

q$graph <- factor(q$graph, levels = c("acm", "csphd", "mesh", "noun", "tetrapoda", "verbf",
                                      "astroph", "condmat", "grqc", "hepph", "diseasome", "facebook", 
                                      "followers-2009", "yeast", "openflights", "ias",
                                      "connectome/Cat1", "connectome/Cat2", "connectome/Cat3",
                                      "connectome/CElegans", "connectome/Drosophila1", "connectome/Drosophila2",
                                      "connectome/Human1", "connectome/Human2", "connectome/Human6",
                                      "connectome/Human7", "connectome/Human8", "connectome/Macaque1", "connectome/Macaque2",
                                      "connectome/Macaque3", "connectome/Macaque4", "connectome/Mouse2", "connectome/Mouse3",
                                      "connectome/Rat1", "connectome/Rat2", "connectome/Rat3", "connectome/ZebraFinch2"))

hierarchies <- c("noun", "verbf", "acm", "mesh", "tetrapoda", "csphd")

graphs_full <- c("acm", "csphd", "mesh", "noun", "tetrapoda", "verbf",
                 "astroph", "condmat", "grqc", "hepph", "diseasome", "facebook", 
                 "followers-2009", "yeast", "openflights", "ias")

graphs_conn <- c("connectome/Cat1", "connectome/Cat2", "connectome/Cat3",
                 "connectome/CElegans", "connectome/Drosophila1", "connectome/Drosophila2",
                 "connectome/Human1", "connectome/Human2", "connectome/Human6",
                 "connectome/Human7", "connectome/Human8", "connectome/Macaque1", "connectome/Macaque2",
                 "connectome/Macaque3", "connectome/Macaque4", "connectome/Mouse2", "connectome/Mouse3",
                 "connectome/Rat1", "connectome/Rat2", "connectome/Rat3", "connectome/ZebraFinch2")

graphs_rankings <- c(graphs_full, graphs_conn)


# exclude experimental unpublished embedders and discrete variants
q <- subset(q, q$name != "X")
#q <- subset(q, q$name != "HypCLOVE+DHRG")
#q <- subset(q, q$name != "Lorentz2D+DHRG")
#q <- subset(q, q$name != "Poincare2D+DHRG")
q <- subset(q, q$name != "RogueViz")
q <- subset(q, q$name != "Penalty")

# exclude more than 2D and Euclidean

q <- subset(q, q$dim <= 2)

tab <- rbind(
  c("BFKL", 0, "black"),
  c("BFKL+DHRG", 12, "black"),
  #c("Penalty", 7, "black"),
  c("Lorentz2D", 5, "black"),
  c("Lorentz2D+DHRG", 14,"black"),
  c("Poincare2D", 2, "black"),
  c("Poincare2D+DHRG", 14, "black"),
  c("Mercator fast", 1, "black"),
  c("Mercator full", 13, "black"),
  c("Anneal2D", 6, "black"),
  c("LTiling",3, "black"),
  c("TreeRep", 4, "darkred"),
  c("Coalescent2",10, "black"),
  c("KVK", 11, "black"),
  c("HypCLOVE", 8, "black"),
  c("HypCLOVE+DHRG", 14, "black"),
  c("LPCS", 14, "black")
)

q$name <- factor(q$name, levels = tab[,1])
#tab <- tab[order(tab[,1]),]

shapes <- as.numeric(t(tab) [2,])
colors <- t(tab) [3,]

# heatmaps from rankings

ranking <- data.frame()
ranking <- prepare_ranking_ecdf(ranking, q, graphs_rankings, "mAP")
draw_centile_rankings("real-world_aggregate_map.pdf", ranking, "MAP")

ranking <- data.frame()
ranking <- prepare_ranking_ecdf(ranking, q, graphs_rankings, "invmr")
draw_centile_rankings("real-world_aggregate_mr.pdf", ranking, "MR")

# those measures are not valid for hierarchies
graphs_nonhierarchies <-  c(c("astroph", "condmat", "grqc", "hepph", "diseasome", "facebook", 
                              "followers-2009", "yeast", "ias", "openflights"), graphs_conn)

ranking <- data.frame()
aux <- subset(q, !( graph  %in% hierarchies))
ranking <- prepare_ranking_ecdf(ranking, aux, graphs_nonhierarchies, "success")
draw_centile_rankings("real-world_aggregate_success.pdf", ranking, "GSR")

ranking <- data.frame()
ranking <- prepare_ranking_ecdf(ranking, aux, graphs_nonhierarchies, "invstr")
draw_centile_rankings("real-world_aggregate_stretch.pdf", ranking, "GSF")

ranking <- data.frame()
ranking <- prepare_ranking_ecdf(ranking, aux, graphs_nonhierarchies, "effect")
draw_centile_rankings("real-world_aggregate_gre.pdf", ranking, "GRE")

aux <- subset(aux, name!="TreeRep")
aux <- subset(aux, name!="LTiling")
aux <- subset(aux, name!="LPCS" | graph!="connectome/Macaque4") 

ranking <- data.frame()
ranking <- prepare_ranking_ecdf(ranking, aux, graphs_nonhierarchies, "control")
draw_centile_rankings("real-world_aggregate_control.pdf", ranking, "ICV")

# plots for synthetic networks
sim <- subset(sim, name!= "X")

sim$graph_name <- paste(sim$graph, sim$ssize, sim$stemp, sim$sid)
sim$graph <- sim$graph_name

sim_graphs <- unique(sim$graph_name)

sim <- subset(sim, sim$dim <= 2)

ranking <- data.frame()
ranking <- prepare_ranking_ecdf(ranking, sim, sim_graphs, "mAP")
draw_centile_rankings("simulated_aggregate_map.pdf", ranking, "MAP")

ranking <- data.frame()
ranking <- prepare_ranking_ecdf(ranking, sim, sim_graphs, "invmr")
draw_centile_rankings("simulated_aggregate_mr.pdf", ranking, "MR")

ranking <- data.frame()
ranking <- prepare_ranking_ecdf(ranking, sim, sim_graphs, "invstr")
draw_centile_rankings("simulated_aggregate_stretch.pdf", ranking, "GSF")

ranking <- data.frame()
ranking <- prepare_ranking_ecdf(ranking, sim, sim_graphs, "success")
draw_centile_rankings("simulated_aggregate_success.pdf", ranking, "GSR")

ranking <- data.frame()
ranking <- prepare_ranking_ecdf(ranking, sim, sim_graphs, "control")
draw_centile_rankings("simulated_aggregate_control.pdf", ranking, "ICV")

ranking <- data.frame()
ranking <- prepare_ranking_ecdf(ranking, sim, sim_graphs, "effect")
draw_centile_rankings("simulated_aggregate_gre.pdf", ranking, "GRE")
