m <- read.table("../tables//statistical-data.csv", sep=";", as.is=T, header=T)
table(m$graph)

# for convenient visualizations make quality measures to [0,1], the higher the better

m$success <- as.numeric(m$success)
m$mAP <- as.numeric(m$mAP)
m$meanrank <- as.numeric(m$meanrank)
m$stretch <- as.numeric(m$stretch)

# inv_meanrank <- 1 / meanrank ; inv_stretch <- 1/stretch
m$invmr <- 1/m$meanrank
m$invstr <- 1/m$stretch

sim <- subset(m, graph=="simulated")

table(sim$method)

# I have to compare BFKL against Lorentz (discrete == 0)

sim$id_merge <- paste(sim$ssize, sim$sid, sim$stemp)


# this won't do much
dens_plo <- function (n, text) {
  lor <- subset(sim, discrete==0 & method == "lorentz" & dim==2)
  bfkl <- subset(sim, discrete==0 & method == "bfkl")
  plot(density(subset(bfkl, ssize == n)$time), main=text, col="blue", xlab=NA)
  lines(density(subset(lor, ssize == n)$time))
  #abline(v = 1, col="red", lwd=3, lty=2)
}

dens_plo <- function (n, text) {
  lor <- subset(sim, discrete==0 & method == "lorentz" & dim==2)
  bfkl <- subset(sim, discrete==0 & method == "bfkl")
  x <- merge(lor, bfkl, by="id_merge", suffixes=c("lor", "bfkl"))
  x$diff <- x$timelor - x$timebfkl
  plot(density(subset(x, ssizelor == n)$diff), main=text, col="blue", xlab=NA)
  #abline(v = 1, col="red", lwd=3, lty=2)
}

dens_plo(100, "n=100")
dens_plo(200, "n=200")
dens_plo(500, "n=500")
dens_plo(1000, "n=1000")
dens_plo(2000, "n=2000")

lor <- subset(sim, discrete==0 & method == "lorentz" & dim==2)
bfkl <- subset(sim, discrete==0 & method == "bfkl")

n <- read.table("../tables//precise-times.csv", sep=";", as.is=T, header=T)
n$id_merge <- paste(n$ssize, n$sid, n$stemp)
n <- data.frame(n$id_merge, n$bfkltime)
colnames(n) <- c("id_merge", "precise_time")
bfkl <- merge(bfkl, n, by="id_merge")

x <- merge(lor, bfkl, by="id_merge", suffixes=c("lor", "bfkl"))

# comparison of difference in time in division by graph size

x$ratio <- (x$timebfkl/x$timelor)*100
x$diff <- x$timelor - x$timebfkl
summary(x$ratio)
x$ratio_map <- (x$mAPbfkl/x$mAPlor)
x$diff_map <- x$mAPlor - x$mAPbfkl
summary(x$ratio_map)
x$ratio_mr <- (x$meanrankbfkl/x$meanranklor)
x$diff_mr <- -log(x$meanranklor) + log(x$meanrankbfkl)
x$ratio_greedy <- (x$successbfkl/x$successlor)
x$diff_greedy <- x$successlor - x$successbfkl
x$ratio_stretch <- (x$stretchbfkl/x$stretchlor)
x$diff_stretch <- -log(x$stretchlor) + log(x$stretchbfkl)
x$diff_control <- x$controllor - x$controlbfkl
x$ratio_control <- (x$controlbfkl/x$controllor)
summary(x$ratio_control)


plot(density(subset(x, ssizelor == 2000 & stempbfkl == 1)$ratio), main="comparison of time [s]", col="blue", xlab=NA, ylim = c(0,8), xlim=c(0,2))
lines(density(subset(x, ssizelor == 1000 & stempbfkl == 1)$ratio), col="red")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 1)$ratio), col="purple")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 1)$ratio), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 1)$ratio), col="green")
legend(1.25, 6, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "red", "purple"), lty=1)

plot(density(subset(x, ssizelor == 2000 & stempbfkl == 7)$ratio), main="comparison of time [s]", col="blue", xlab=NA, ylim = c(0,3.5), xlim=c(0,2))
lines(density(subset(x, ssizelor == 1000 & stempbfkl == 7)$ratio), col="red")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 7)$ratio), col="purple")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 7)$ratio), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 7)$ratio), col="green")
legend(1.25, 2.5, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "red", "purple"), lty=1)

# comparison of map in division by graph size

plot(density(subset(x, ssizelor == 2000 & stempbfkl == 1)$ratio_map), main="comparison of map", col="blue", xlab=NA)
lines(density(subset(x, ssizelor == 1000 & stempbfkl == 1)$ratio_map), col="red")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 1)$ratio_map), col="purple")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 1)$ratio_map), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 1)$ratio_map), col="green")
abline(v=1)
legend(140, 0.055, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "red", "purple"), lty=1)

plot(density(subset(x, ssizelor == 2000 & stempbfkl == 7)$ratio_map), main="comparison of map", col="blue", xlab=NA, ylim=c(0,0.04), xlim=c(50,130))
lines(density(subset(x, ssizelor == 1000 & stempbfkl == 7)$ratio_map), col="red")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 7)$ratio_map), col="purple")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 7)$ratio_map), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 7)$ratio_map), col="green")
abline(v=100)
legend(105, 0.04, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "red", "purple"), lty=1)

# diff in map

pdf("~/densities_MAP.pdf", 30,7)
par(mfrow=c(1,3))
plot(density(subset(x, ssizelor == 2000 & stempbfkl == 1)$diff_map), main="comparison of MAP, T=0.1", col="blue", xlab=NA, ylab = NA, xlim=c(-0.15, 0.15), cex.main=3.5, cex.axis=2)
lines(density(subset(x, ssizelor == 1000 & stempbfkl == 1)$diff_map), col="black")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 1)$diff_map), col="red")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 1)$diff_map), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 1)$diff_map), col="green")
abline(v=0)
#legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500", "n=200", "n=100"),
#       col=c("blue", "red", "purple", "yellow", "green"), lty=1)
legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "black", "red"), lty=1)

plot(density(subset(x, ssizelor == 2000 & stempbfkl == 4)$diff_map), main="comparison of MAP, T=0.4", col="blue", xlab=NA, ylab=NA, xlim=c(-0.15, 0.20), cex.main=3.5, cex.axis=2)
lines(density(subset(x, ssizelor == 1000 & stempbfkl ==4)$diff_map), col="black")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 4)$diff_map), col="red")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 4)$diff_map), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 4)$diff_map), col="green")
abline(v=0)
#legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500", "n=200", "n=100"),
#       col=c("blue", "red", "purple", "yellow", "green"), lty=1)
legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "black", "red"), lty=1)


plot(density(subset(x, ssizelor == 2000 & stempbfkl == 7)$diff_map), main="comparison of MAP, T=0.7", col="blue", xlab=NA, ylab = NA, xlim=c(-0.1,0.2), ylim=c(0,23), cex.main=3.5, cex.axis=2)
lines(density(subset(x, ssizelor == 1000 & stempbfkl == 7)$diff_map), col="black")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 7)$diff_map), col="red")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 7)$diff_map), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 7)$diff_map), col="green")
abline(v=0)
#legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500", "n=200", "n=100"),
#       col=c("blue", "red", "purple", "yellow", "green"), lty=1)
legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "black", "red"), lty=1)

dev.off()


# individual plots

plot(density(subset(x, ssizelor == 2000 & stempbfkl == 1)$mAPlor), col="blue")
lines(density(subset(x, ssizelor == 2000 & stempbfkl == 1)$mAPbfkl), col="black") 

plot(density(subset(x, ssizelor == 1000 & stempbfkl == 1)$mAPlor), col="blue")
lines(density(subset(x, ssizelor == 1000 & stempbfkl == 1)$mAPbfkl), col="black")

plot(density(subset(x, ssizelor == 500 & stempbfkl == 1)$mAPlor), col="blue")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 1)$mAPbfkl), col="black") 

#plot(density(subset(x, ssizelor == 200 & stempbfkl == 1)$mAPlor), col="blue")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 1)$mAPbfkl), col="black") 

#plot(density(subset(x, ssizelor == 100 & stempbfkl == 1)$mAPlor), col="blue")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 1)$mAPbfkl), col="black") 

table(x$methodbfkl, x$ssizebfkl)
table(x$methodlor, x$ssizelor)

plot(density(subset(m, ssize == 100 & stemp == 1 & method=="lorentz" & dim==2 & discrete==0)$mAP), col="blue")

# comparison of diff in mr

pdf("~/densities_MR.pdf", 30,7)
par(mfrow=c(1,3))
plot(density(subset(x, ssizelor == 2000 & stempbfkl == 1)$diff_mr), main="comparison of -log(MR), T=0.1", col="blue", xlab=NA, ylim=c(0,1.1))
lines(density(subset(x, ssizelor == 1000 & stempbfkl == 1)$diff_mr), col="black")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 1)$diff_mr), col="red")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 1)$diff_mr), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 1)$diff_mr), col="green")
abline(v=0)
#legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500", "n=200", "n=100"),
#       col=c("blue", "red", "purple", "yellow", "green"), lty=1)
legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "black", "red"), lty=1)

plot(density(subset(x, ssizelor == 2000 & stempbfkl == 4)$diff_mr), main="comparison of -log(MR), T=0.4", col="blue", xlab=NA, ylim=c(0,2.0))
lines(density(subset(x, ssizelor == 1000 & stempbfkl ==4)$diff_mr), col="black")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 4)$diff_mr), col="red")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 4)$diff_mr), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 4)$diff_mr), col="green")
abline(v=0)
#legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500", "n=200", "n=100"),
#       col=c("blue", "red", "purple", "yellow", "green"), lty=1)
legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "black", "red"), lty=1)

plot(density(subset(x, ssizelor == 2000 & stempbfkl == 7)$diff_mr), main="comparison of -log(MR), T=0.7", col="blue", xlab=NA, ylim=c(0,3.2))
lines(density(subset(x, ssizelor == 1000 & stempbfkl == 7)$diff_mr), col="black")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 7)$diff_mr), col="red")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 7)$diff_mr), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 7)$diff_mr), col="green")
abline(v=0)
#legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500", "n=200", "n=100"),
#       col=c("blue", "red", "purple", "yellow", "green"), lty=1)
legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "black", "red"), lty=1)
dev.off()


# comparison of diff in greedy

pdf("~/densities_greedy.pdf", 30,7)
par(mfrow=c(1,3))
plot(density(subset(x, ssizelor == 2000 & stempbfkl == 1)$diff_greedy), main="comparison of greedy success, T=0.1", col="blue", xlab=NA, ylab=NA, xlim=c(-0.25,0.2), ylim=c(0,13), cex.main=3.5, cex.axis=2)
lines(density(subset(x, ssizelor == 1000 & stempbfkl == 1)$diff_greedy), col="black")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 1)$diff_greedy), col="red")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 1)$diff_greedy), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 1)$diff_greedy), col="green")
abline(v=0)
#legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500", "n=200", "n=100"),
#       col=c("blue", "red", "purple", "yellow", "green"), lty=1)
legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "black", "red"), lty=1)

plot(density(subset(x, ssizelor == 2000 & stempbfkl == 4)$diff_greedy), main="comparison of greedy success, T=0.4", col="blue", xlab=NA, ylab=NA, xlim=c(-0.1,0.2), ylim=c(0,13), cex.main=3.5, cex.axis=2)
lines(density(subset(x, ssizelor == 1000 & stempbfkl ==4)$diff_greedy), col="black")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 4)$diff_greedy), col="red")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 4)$diff_greedy), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 4)$diff_greedy), col="green")
abline(v=0)
#legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500", "n=200", "n=100"),
#       col=c("blue", "red", "purple", "yellow", "green"), lty=1)
legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "black", "red"), lty=1)

plot(density(subset(x, ssizelor == 2000 & stempbfkl == 7)$diff_greedy), main="comparison of greedy success, T=0.7", col="blue", xlab=NA, ylab=NA, xlim=c(-0.1,0.2), ylim=c(0,13), cex.main=3.5, cex.axis=2 )
lines(density(subset(x, ssizelor == 1000 & stempbfkl == 7)$diff_greedy), col="black")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 7)$diff_greedy), col="red")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 7)$diff_greedy), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 7)$diff_greedy), col="green")
abline(v=0)
#legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500", "n=200", "n=100"),
#       col=c("blue", "red", "purple", "yellow", "green"), lty=1)
legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "black", "red"), lty=1)
dev.off()

# comparison of diff in stretch

pdf("~/densities_stretch.pdf", 30,7)
par(mfrow=c(1,3))
plot(density(subset(x, ssizelor == 2000 & stempbfkl == 1)$diff_stretch), main="comparison of -log(stretch), T=0.1", col="blue", xlab=NA, ylim=c(0,13), xlim=c(-0.15,0.15))
lines(density(subset(x, ssizelor == 1000 & stempbfkl == 1)$diff_stretch), col="black")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 1)$diff_stretch), col="red")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 1)$diff_stretch), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 1)$diff_stretch), col="green")
abline(v=0)
#legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500", "n=200", "n=100"),
#       col=c("blue", "red", "purple", "yellow", "green"), lty=1)
legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "black", "red"), lty=1)

plot(density(subset(x, ssizelor == 2000 & stempbfkl == 4)$diff_stretch), main="comparison of -log(stretch), T=0.4", col="blue", xlab=NA, ylim=c(0,15))
lines(density(subset(x, ssizelor == 1000 & stempbfkl ==4)$diff_stretch), col="black")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 4)$diff_stretch), col="red")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 4)$diff_stretch), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 4)$diff_stretch), col="green")
abline(v=0)
#legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500", "n=200", "n=100"),
#       col=c("blue", "red", "purple", "yellow", "green"), lty=1)
legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "black", "red"), lty=1)

plot(density(subset(x, ssizelor == 2000 & stempbfkl == 7)$diff_stretch), main="comparison of -log(stretch), T=0.7", col="blue", xlab=NA, ylim=c(0,15))
lines(density(subset(x, ssizelor == 1000 & stempbfkl == 7)$diff_stretch), col="black")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 7)$diff_stretch), col="red")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 7)$diff_stretch), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 7)$diff_stretch), col="green")
abline(v=0)
#legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500", "n=200", "n=100"),
#       col=c("blue", "red", "purple", "yellow", "green"), lty=1)
legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "black", "red"), lty=1)
dev.off()

# diffs in control
pdf("~/densities_control.pdf", 30,7)
par(mfrow=c(1,3))
plot(density(subset(x, ssizelor == 2000 & stempbfkl == 1)$diff_control), main="comparison of control, T=0.1", col="blue", xlab=NA, xlim=c(-0.15, 0.15))
lines(density(subset(x, ssizelor == 1000 & stempbfkl == 1)$diff_control), col="black")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 1)$diff_control), col="red")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 1)$diff_control), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 1)$diff_control), col="green")
abline(v=0)
#legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500", "n=200", "n=100"),
#       col=c("blue", "red", "purple", "yellow", "green"), lty=1)
legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "black", "red"), lty=1)

plot(density(subset(x, ssizelor == 2000 & stempbfkl == 4)$diff_control), main="comparison of control, T=0.4", col="blue", xlab=NA, xlim=c(-0.15, 0.20))
lines(density(subset(x, ssizelor == 1000 & stempbfkl ==4)$diff_control), col="black")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 4)$diff_control), col="red")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 4)$diff_control), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 4)$diff_control), col="green")
abline(v=0)
#legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500", "n=200", "n=100"),
#       col=c("blue", "red", "purple", "yellow", "green"), lty=1)
legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "black", "red"), lty=1)


plot(density(subset(x, ssizelor == 2000 & stempbfkl == 7)$diff_control), main="comparison of control, T=0.7", col="blue", xlab=NA, xlim=c(-0.1,0.2), ylim=c(0,45))
lines(density(subset(x, ssizelor == 1000 & stempbfkl == 7)$diff_control), col="black")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 7)$diff_control), col="red")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 7)$diff_control), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 7)$diff_control), col="green")
abline(v=0)
#legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500", "n=200", "n=100"),
#       col=c("blue", "red", "purple", "yellow", "green"), lty=1)
legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "black", "red"), lty=1)

dev.off()

###########

# regressions

set.seed(17052023)
x$bfklwins_map <- as.factor(ifelse(x$diff_map<0, 1,0))
x$bfklwins_mr <- as.factor(ifelse(x$diff_mr<0, 1,0))
x$bfklwins_greedy <- as.factor(ifelse(x$diff_greedy<0, 1,0))
x$bfklwins_stretch <- as.factor(ifelse(x$diff_stretch<0, 1,0))
x$bfklwins_control <- as.factor(ifelse(x$diff_control<0, 1,0))


table(subset(x, stempbfkl==1)$bfklwins_map)
table(subset(x, stempbfkl==4)$bfklwins_map)
table(subset(x, stempbfkl==7)$bfklwins_map)

table(subset(x, stempbfkl==1)$bfklwins_mr)
table(subset(x, stempbfkl==4)$bfklwins_mr)
table(subset(x, stempbfkl==7)$bfklwins_mr)

xlog <- subset(x, ssizebfkl >=500)

library(caret)
train_control <- trainControl(method='cv', number=20)

kfold_train <- train(bfklwins_map~as.factor(stempbfkl) + nbfkl + mbfkl, data=xlog, 
                     method='glm', family=binomial, trControl=train_control)

print(kfold_train)

kfold_train <- train(bfklwins_map~as.factor(stempbfkl) + as.factor(ssizebfkl), data=xlog, 
                     method='glm', family=binomial, trControl=train_control)

print(kfold_train)

reg <- glm(bfklwins_map~as.factor(stempbfkl) + nbfkl + mbfkl, data=xlog, family=binomial)
summary(reg)

reg <- glm(bfklwins_map~as.factor(stempbfkl) + as.factor(ssizebfkl), data=xlog, family=binomial)
summary(reg)

library(lmtest)
lrtest(reg, . ~ . - nbfkl - mbfkl)

# for mr
#kfold_train <- train(bfklwins_mr~as.factor(stempbfkl) + nbfkl + mbfkl, data=xlog, 
#                     method='glm', family=binomial, trControl=train_control)

#print(kfold_train)

kfold_train <- train(bfklwins_mr~as.factor(stempbfkl) + as.factor(ssizebfkl), data=xlog, 
                     method='glm', family=binomial, trControl=train_control)

print(kfold_train)

#reg <- glm(bfklwins_mr~as.factor(stempbfkl) + nbfkl + mbfkl, data=xlog, family=binomial)
#summary(reg)

reg <- glm(bfklwins_mr~as.factor(stempbfkl) + as.factor(ssizebfkl), data=xlog, family=binomial)
summary(reg)

# for greedy
library(caret)
train_control <- trainControl(method='cv', number=20)

#kfold_train <- train(bfklwins_greedy~as.factor(stempbfkl) + nbfkl + mbfkl, data=xlog, 
#                    method='glm', family=binomial, trControl=train_control)
#print(kfold_train)

kfold_train <- train(bfklwins_greedy~as.factor(stempbfkl) + as.factor(ssizebfkl), data=xlog, 
                     method='glm', family=binomial, trControl=train_control)

print(kfold_train)

#reg <- glm(bfklwins_greedy~as.factor(stempbfkl) + nbfkl + mbfkl, data=xlog, family=binomial)
#summary(reg)

reg <- glm(bfklwins_greedy~as.factor(stempbfkl) + as.factor(ssizebfkl), data=xlog, family=binomial)
summary(reg)

# for stretch
library(caret)
train_control <- trainControl(method='cv', number=20)

#kfold_train <- train(bfklwins_greedy~as.factor(stempbfkl) + nbfkl + mbfkl, data=xlog, 
#                    method='glm', family=binomial, trControl=train_control)
#print(kfold_train)

kfold_train <- train(bfklwins_stretch~as.factor(stempbfkl) + as.factor(ssizebfkl), data=xlog, 
                     method='glm', family=binomial, trControl=train_control)

print(kfold_train)

#reg <- glm(bfklwins_greedy~as.factor(stempbfkl) + nbfkl + mbfkl, data=xlog, family=binomial)
#summary(reg)

reg <- glm(bfklwins_stretch~as.factor(stempbfkl) + as.factor(ssizebfkl), data=xlog, family=binomial)
summary(reg)

# for control
library(caret)
train_control <- trainControl(method='cv', number=20)

#kfold_train <- train(bfklwins_greedy~as.factor(stempbfkl) + nbfkl + mbfkl, data=xlog, 
#                    method='glm', family=binomial, trControl=train_control)
#print(kfold_train)
xlog$radiusratio <- xlog$radiusbfkl/xlog$radiuslor
kfold_train <- train(bfklwins_control~as.factor(stempbfkl) + as.factor(ssizebfkl) +radiusratio, data=xlog, 
                     method='glm', family=binomial, trControl=train_control)

print(kfold_train)

#reg <- glm(bfklwins_greedy~as.factor(stempbfkl) + nbfkl + mbfkl, data=xlog, family=binomial)
#summary(reg)

reg <- glm(bfklwins_control~as.factor(stempbfkl) + as.factor(ssizebfkl) + radiusratio , data=xlog, family=binomial)
summary(reg)

# scatterplot for time vs quality

# change to precise

xlog$timeloss <- (xlog$timelor/xlog$precise_time)


lorbet_map <- subset(xlog, bfklwins_map==0)
lorbet_map$pergain <- 100*((lorbet_map$mAPlor/lorbet_map$mAPbfkl)-1)



pdf("~/gain_MAP01.pdf", 10,7)
library(ggplot2)
# Basic scatter plot
ggplot(subset(lorbet_map,stempbfkl==1), aes(x=pergain, y=timeloss)) + geom_point() + theme_minimal(base_size = 40) +
  labs(title="T=0.1", x="", y = "")
#plot(subset(lorbet_map,stempbfkl==1)$pergain, subset(lorbet_map,stempbfkl==1)$timeloss, xlab = "Percentage gain in quality", ylab = "How many times longer it takes",  main="T=0.1", cex.lab=2, cex.main=2, cex.axis = 2)
#plot(subset(lorbet_map,stempbfkl==1)$pergain, subset(lorbet_map,stempbfkl==1)$timeloss, xlab = "Percentage gain in quality", ylab = "",  main="T=0.1", cex.lab=2.5, cex.main=2.5, cex.axis = 2.5)
dev.off()

cor.test(subset(lorbet_map,stempbfkl==1)$pergain, subset(lorbet_map,stempbfkl==1)$timeloss, method="kendall")

pdf("~/gain_MAP04.pdf", 10,7)
library(ggplot2)
# Basic scatter plot
ggplot(subset(lorbet_map,stempbfkl==4), aes(x=pergain, y=timeloss)) + geom_point() + theme_minimal(base_size = 40) +
  labs(title="T=0.4", x="", y = "")
#plot(subset(lorbet_map,stempbfkl==4)$pergain, subset(lorbet_map,stempbfkl==4)$timeloss,xlab = "Percentage gain in quality", ylab = "How many times longer it takes", main="T=0.4")
dev.off()

cor.test(subset(lorbet_map,stempbfkl==4)$pergain, subset(lorbet_map,stempbfkl==4)$timeloss, method="kendall")

pdf("~/gain_MAP07.pdf", 10,7)
library(ggplot2)
# Basic scatter plot
ggplot(subset(lorbet_map,stempbfkl==7), aes(x=pergain, y=timeloss)) + geom_point() + theme_minimal(base_size = 40) +
  labs(title="T=0.7", x="", y = "")
#plot(subset(lorbet_map,stempbfkl==7)$pergain, subset(lorbet_map,stempbfkl==7)$timeloss, xlab = "Percentage gain in quality", ylab = "How many times longer it takes",  main="T=0.7")
dev.off()

cor.test(subset(lorbet_map,stempbfkl==7)$pergain, subset(lorbet_map,stempbfkl==7)$timeloss, method="kendall")

# table with real graphs
q <- subset(m, graph!="simulated")
q <- subset(q, graph!="sim3")

q <- subset(q, q$sid==0)
q <- unique(q)

q$graph <- as.factor(q$graph)
q <- subset(q, graph!="mammal")

q$graph <- factor(q$graph, levels = c("acm", "csphd", "mesh", "noun", "tetrapoda", "verbf",
                                      "astroph", "condmat", "grqc", "hepph", "diseasome", "facebook", 
                                      "followers-2009", "yeast",
                                      "connectome/Cat1", "connectome/Cat2", "connectome/Cat3",
                                      "connectome/CElegans", "connectome/Drosophila1", "connectome/Drosophila2",
                                      "connectome/Human1", "connectome/Human2", "connectome/Human6",
                                      "connectome/Human7", "connectome/Human8", "connectome/Macaque1", "connectome/Macaque2",
                                      "connectome/Macaque3", "connectome/Macaque4", "connectome/Mouse2", "connectome/Mouse3",
                                      "connectome/Rat1", "connectome/Rat2", "connectome/Rat3", "connectome/ZebraFinch2"))
q <- subset(q, q$name != "X")

library(tidyverse)

draw_best_rankings <- function(filename, df, measure, anim_var, shape_vector, color_vector, title) {
  library(ggplot2)
  pdf(filename, 10,7)
  g <- ggplot(df, aes_string(x=anim_var, y=measure)) +
    geom_point(aes(shape=name, color=name), size=3) + 
    scale_shape_manual(values=shape_vector) +
    scale_color_manual(values=color_vector) +
    xlab("graph") + ylab(title) +
    scale_x_discrete(guide = guide_axis(angle = 90)) + theme_minimal(base_size = 15)
  print(g)
  dev.off() 
}

prepare_ranking <- function(df, df_orig, graph_list, measure) {
  for (graph1 in graph_list) {
    a <- subset(df_orig, df_orig$graph==graph1)
    
    placeholder = data.frame(matrix(nrow = nrow(a), ncol = 3)) 
    colnames(placeholder) = c("name","graph","rank") 
    placeholder$name <- a$name
    placeholder$graph <- a$graph
    placeholder$rank <-  rank(a[[measure]], na.last = "keep")
    
    df <- rbind(df, placeholder)
  }
  return(df)
}


tab <- rbind(
  c("BFKL", 0, "black"),
  c("BFKL+DHRG", 12, "black"),
  c("Penalty", 7, "black"),
  c("Lorentz2D", 5, "black"),
  c("Lorentz2D+DHRG", 9, "black"),
  c("Lorentz3D", 5, "forestgreen"),
  c("Poincare2D", 2, "black"),
  c("Poincare3D", 2, "forestgreen"),
  c("Poincare5D", 2, "blue"),
  c("Mercator fast", 1, "black"),
  c("Mercator full", 13, "black"),
  c("d-Mercator", 13, "forestgreen"),
  c("Anneal2D", 6, "black"),
  c("Anneal3D",6 , "forestgreen"),
  c("LTiling",3, "black"),
  c("TreeRep", 4, "black"),
  c("Euclidean200D",3, "blue"),
  c("Euclidean50D", 4, "blue")
)

q$name <- factor(q$name, levels = tab[,1])
#tab <- tab[order(tab[,1]),]

shapes <- as.numeric(t(tab) [2,])
colors <- t(tab) [3,]

graphs_full <- c("acm", "csphd", "mesh", "noun", "tetrapoda", "verbf",
                 "astroph", "condmat", "grqc", "hepph", "diseasome", "facebook", 
                 "followers-2009", "yeast")

graphs_conn <- c("connectome/Cat1", "connectome/Cat2", "connectome/Cat3",
                 "connectome/CElegans", "connectome/Drosophila1", "connectome/Drosophila2",
                 "connectome/Human1", "connectome/Human2", "connectome/Human6",
                 "connectome/Human7", "connectome/Human8", "connectome/Macaque1", "connectome/Macaque2",
                 "connectome/Macaque3", "connectome/Macaque4", "connectome/Mouse2", "connectome/Mouse3",
                 "connectome/Rat1", "connectome/Rat2", "connectome/Rat3", "connectome/ZebraFinch2")

#to_exclude <- c("Euclidean200D", "Euclidean50D", "Poincare5D", "TreeRep", "LTiling")
to_exclude <- c()

tab2 <- tab[!(tab[,1] %in% to_exclude),]
shapes2 <- as.numeric(t(tab2) [2,])
colors2 <- t(tab2) [3,]

# plots for MAP

aux <- subset(q, graph %in% graphs_full)
draw_best_rankings("~/real-world_MAP.pdf", aux, "mAP", "graph", shapes, colors, "MAP")

ranking <- data.frame()
ranking <- prepare_ranking(ranking, q, graphs_full, "mAP")
draw_best_rankings("~/real-world_MAP_ranking.pdf", ranking, "rank", "graph", shapes, colors, "ranks of MAP")

aux <- subset(q, graph %in% graphs_conn)
aux <- subset(aux, !(name %in% to_exclude))
aux$graph <- gsub("connectome/", "", aux$graph)
draw_best_rankings("~/real-world_MAP_conn.pdf", aux, "mAP", "graph", shapes2, colors2, "MAP")

ranking <- data.frame()
aux <- subset(q, !(name %in% to_exclude))
ranking <- prepare_ranking(ranking, aux, graphs_conn, "mAP")
ranking$graph <- gsub("connectome/", "", ranking$graph)
draw_best_rankings("~/real-world_MAP_ranking_conn.pdf", ranking, "rank", "graph", shapes2, colors2, "ranks of MAP")

# for the needs of short version

graphs_paper <- c("acm", "csphd", "mesh", "noun", "tetrapoda", "verbf",
                  "astroph", "condmat", "grqc", "hepph", "diseasome", "facebook", "yeast",
                  "connectome/CElegans", "connectome/Human1", "connectome/Drosophila1", "connectome/Mouse3")
aux <- subset(q, graph %in% graphs_paper)
aux$graph <- gsub("connectome/", "", aux$graph)
draw_best_rankings("~/real-world_MAP_short.pdf", aux, "mAP", "graph", shapes, colors, "MAP")

# plots for MR
aux <- subset(q, graph %in% graphs_full)
draw_best_rankings("~/real-world_MR.pdf", aux, "invmr", "graph", shapes, colors, "IMR")

ranking <- data.frame()
ranking <- prepare_ranking(ranking, q, graphs_full, "invmr")
draw_best_rankings("~/real-world_MR_ranking.pdf", ranking, "rank", "graph", shapes, colors, "ranks of IMR")

aux <- subset(q, graph %in% graphs_conn)
aux <- subset(aux, !(name %in% to_exclude))
aux$graph <- gsub("connectome/", "", aux$graph)
draw_best_rankings("~/real-world_MR_conn.pdf", aux, "invmr", "graph", shapes2, colors2, "IMR")

ranking <- data.frame()
aux <- subset(q, !(name %in% to_exclude))
ranking <- prepare_ranking(ranking, aux, graphs_conn, "invmr")
ranking$graph <- gsub("connectome/", "", ranking$graph)
draw_best_rankings("~/real-world_MR_ranking_conn.pdf", ranking, "rank", "graph", shapes2, colors2, "ranks of IMR")


# plots for success

hierarchies <- c("noun", "verbf", "acm", "mesh", "tetrapoda", "csphd")
aux <- subset(q, !( graph  %in% hierarchies) & !(graph %in% graphs_conn) & !(name %in% to_exclude))

draw_best_rankings("~/real-world_success.pdf", aux, "success", "graph", shapes2, colors2, "Greedy success rate") 

ranking <- data.frame()
ranking <- prepare_ranking(ranking, aux, graphs_full, "success")
draw_best_rankings("~/real-world_success_ranking.pdf", ranking, "rank", "graph", shapes2, colors2, "ranks of greedy success")

aux <- subset(q, graph %in% graphs_conn & !(name %in% to_exclude))

aux$graph <- gsub("connectome/", "", aux$graph)
draw_best_rankings("~/real-world_success_conn.pdf", aux, "success", "graph", shapes2, colors2, "Greedy success rate")

aux <- subset(q, graph %in% graphs_conn)
aux <- subset(aux, !(name %in% to_exclude))
ranking <- data.frame()
ranking <- prepare_ranking(ranking, aux, graphs_conn, "success")
ranking$graph <- gsub("connectome/", "", ranking$graph)
draw_best_rankings("~/real-world_success_ranking_conn.pdf", ranking, "rank", "graph", shapes2, colors2, "ranks of greedy success")

# for paper
aux <- subset(q, graph %in% graphs_paper)
aux <- subset(aux, !( graph  %in% hierarchies))
aux <- subset(aux, !(name %in% to_exclude))
aux$graph <- gsub("connectome/", "", aux$graph)
draw_best_rankings("~/real-world_success_short.pdf", aux, "success", "graph", shapes2, colors2, "Greedy success rate")

# plots for stretch

aux <- subset(q, !( graph  %in% hierarchies) & !(graph %in% graphs_conn) & !(name %in% to_exclude))

draw_best_rankings("~/real-world_stretch.pdf", aux, "invstr", "graph", shapes2, colors2, "Stretch") 

ranking <- data.frame()
ranking <- prepare_ranking(ranking, aux, graphs_full, "invstr")
draw_best_rankings("~/real-world_stretch_ranking.pdf", ranking, "rank", "graph", shapes2, colors2, "ranks of stretch")

aux <- subset(q, graph %in% graphs_conn & !(name %in% to_exclude))

aux$graph <- gsub("connectome/", "", aux$graph)
draw_best_rankings("~/real-world_stretch_conn.pdf", aux, "invstr", "graph", shapes2, colors2, "Stretch")

aux <- subset(q, graph %in% graphs_conn)
aux <- subset(aux, !(name %in% to_exclude))
ranking <- data.frame()
ranking <- prepare_ranking(ranking, aux, graphs_conn, "invstr")
ranking$graph <- gsub("connectome/", "", ranking$graph)
draw_best_rankings("~/real-world_stretch_ranking_conn.pdf", ranking, "rank", "graph", shapes2, colors2, "ranks of stretch")


# plots for control
to_exclude <- c("Euclidean200D", "Euclidean50D", "Poincare5D", "TreeRep", "LTiling")

hierarchies <- c("noun", "verbf", "acm", "mesh", "tetrapoda", "csphd")
aux <- subset(q, !( graph  %in% hierarchies) & !(graph %in% graphs_conn) & !(name %in% to_exclude))

draw_best_rankings("~/real-world_control.pdf", aux, "control", "graph", shapes2, colors2, "Control")

ranking <- data.frame()
ranking <- prepare_ranking(ranking, aux, graphs_full, "control")
draw_best_rankings("~/real-world_control_ranking.pdf", ranking, "rank", "graph", shapes2, colors2, "ranks of Control")

aux <- subset(q, graph %in% graphs_conn)
aux <- subset(aux, !(name %in% to_exclude))
aux$graph <- gsub("connectome/", "", aux$graph)
draw_best_rankings("~/real-world_control_conn.pdf", aux, "control", "graph", shapes2, colors2, "Control")

ranking <- data.frame()
aux <- subset(q, !(name %in% to_exclude))
ranking <- prepare_ranking(ranking, aux, graphs_conn, "control")
ranking$graph <- gsub("connectome/", "", ranking$graph)
draw_best_rankings("~/real-world_control_ranking_conn.pdf", ranking, "rank", "graph", shapes2, colors2, "ranks of Control")


