m <- read.table("~/transfer/statistical-data.csv", sep=";", as.is=T, header=T)
table(m$graph)

# for convenient visualizations make quality measures to [0,1], the higher the better

m$success <- as.numeric(m$success)
m$mAP <- as.numeric(m$mAP)
m$meanrank <- as.numeric(m$meanrank)
m$stretch <- as.numeric(m$stretch)

# inv_meanrank <- 1 / meanrank ; inv_stretch <- 1/stretch
m$invmr <- 1/m$meanrank
m$invstr <- 1/m$stretch

sim <- subset(m, graph=="simulated")

table(sim$method)

# I have to compare BFKL against Lorentz (discrete == 0)

sim$id_merge <- paste(sim$ssize, sim$sid, sim$stemp)


# this won't do much
dens_plo <- function (n, text) {
  lor <- subset(sim, discrete==0 & method == "lorentz" & dim==2)
  bfkl <- subset(sim, discrete==0 & method == "bfkl")
  plot(density(subset(bfkl, ssize == n)$time), main=text, col="blue", xlab=NA)
  lines(density(subset(lor, ssize == n)$time))
  #abline(v = 1, col="red", lwd=3, lty=2)
}

dens_plo <- function (n, text) {
  lor <- subset(sim, discrete==0 & method == "lorentz" & dim==2)
  bfkl <- subset(sim, discrete==0 & method == "bfkl")
  x <- merge(lor, bfkl, by="id_merge", suffixes=c("lor", "bfkl"))
  x$diff <- x$timelor - x$timebfkl
  plot(density(subset(x, ssizelor == n)$diff), main=text, col="blue", xlab=NA)
  #abline(v = 1, col="red", lwd=3, lty=2)
}

dens_plo(100, "n=100")
dens_plo(200, "n=200")
dens_plo(500, "n=500")
dens_plo(1000, "n=1000")
dens_plo(2000, "n=2000")

lor <- subset(sim, discrete==0 & method == "lorentz" & dim==2)
bfkl <- subset(sim, discrete==0 & method == "bfkl")

n <- read.table("../tables//precise-times.csv", sep=";", as.is=T, header=T)
n$id_merge <- paste(n$ssize, n$sid, n$stemp)
n <- data.frame(n$id_merge, n$bfkltime)
colnames(n) <- c("id_merge", "precise_time")
bfkl <- merge(bfkl, n, by="id_merge")

x <- merge(lor, bfkl, by="id_merge", suffixes=c("lor", "bfkl"))

# comparison of difference in time in division by graph size

x$ratio <- (x$timebfkl/x$timelor)*100
x$diff <- x$timelor - x$timebfkl
summary(x$ratio)
x$ratio_map <- (x$mAPbfkl/x$mAPlor)
x$diff_map <- x$mAPlor - x$mAPbfkl
summary(x$ratio_map)
x$ratio_mr <- (x$meanrankbfkl/x$meanranklor)
x$diff_mr <- -log(x$meanranklor) + log(x$meanrankbfkl)
x$ratio_greedy <- (x$successbfkl/x$successlor)
x$diff_greedy <- x$successlor - x$successbfkl
x$diff_eff <- x$effectlor - x$effectbfkl
x$ratio_stretch <- (x$stretchbfkl/x$stretchlor)
x$diff_stretch <- -log(x$stretchlor) + log(x$stretchbfkl)
x$diff_control <- x$controllor - x$controlbfkl
x$ratio_control <- (x$controlbfkl/x$controllor)
summary(x$ratio_control)


plot(density(subset(x, ssizelor == 2000 & stempbfkl == 1)$ratio), main="comparison of time [s]", col="blue", xlab=NA, ylim = c(0,8), xlim=c(0,2))
lines(density(subset(x, ssizelor == 1000 & stempbfkl == 1)$ratio), col="red")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 1)$ratio), col="purple")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 1)$ratio), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 1)$ratio), col="green")
legend(1.25, 6, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "red", "purple"), lty=1)

plot(density(subset(x, ssizelor == 2000 & stempbfkl == 7)$ratio), main="comparison of time [s]", col="blue", xlab=NA, ylim = c(0,3.5), xlim=c(0,2))
lines(density(subset(x, ssizelor == 1000 & stempbfkl == 7)$ratio), col="red")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 7)$ratio), col="purple")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 7)$ratio), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 7)$ratio), col="green")
legend(1.25, 2.5, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "red", "purple"), lty=1)

# comparison of map in division by graph size

plot(density(subset(x, ssizelor == 2000 & stempbfkl == 1)$ratio_map), main="comparison of map", col="blue", xlab=NA)
lines(density(subset(x, ssizelor == 1000 & stempbfkl == 1)$ratio_map), col="red")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 1)$ratio_map), col="purple")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 1)$ratio_map), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 1)$ratio_map), col="green")
abline(v=1)
legend(140, 0.055, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "red", "purple"), lty=1)

plot(density(subset(x, ssizelor == 2000 & stempbfkl == 7)$ratio_map), main="comparison of map", col="blue", xlab=NA, ylim=c(0,0.04), xlim=c(50,130))
lines(density(subset(x, ssizelor == 1000 & stempbfkl == 7)$ratio_map), col="red")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 7)$ratio_map), col="purple")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 7)$ratio_map), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 7)$ratio_map), col="green")
abline(v=100)
legend(105, 0.04, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "red", "purple"), lty=1)

# diff in map

pdf("~/densities_MAP.pdf", 30,7)
par(mfrow=c(1,3))
plot(density(subset(x, ssizelor == 2000 & stempbfkl == 1)$diff_map), main="comparison of MAP, T=0.1", col="blue", xlab=NA, ylab = NA, xlim=c(-0.15, 0.15), cex.main=3.5, cex.axis=2)
lines(density(subset(x, ssizelor == 1000 & stempbfkl == 1)$diff_map), col="black")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 1)$diff_map), col="red")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 1)$diff_map), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 1)$diff_map), col="green")
abline(v=0)
#legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500", "n=200", "n=100"),
#       col=c("blue", "red", "purple", "yellow", "green"), lty=1)
legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "black", "red"), lty=1)

plot(density(subset(x, ssizelor == 2000 & stempbfkl == 4)$diff_map), main="comparison of MAP, T=0.4", col="blue", xlab=NA, ylab=NA, xlim=c(-0.15, 0.20), cex.main=3.5, cex.axis=2)
lines(density(subset(x, ssizelor == 1000 & stempbfkl ==4)$diff_map), col="black")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 4)$diff_map), col="red")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 4)$diff_map), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 4)$diff_map), col="green")
abline(v=0)
#legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500", "n=200", "n=100"),
#       col=c("blue", "red", "purple", "yellow", "green"), lty=1)
legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "black", "red"), lty=1)


plot(density(subset(x, ssizelor == 2000 & stempbfkl == 7)$diff_map), main="comparison of MAP, T=0.7", col="blue", xlab=NA, ylab = NA, xlim=c(-0.1,0.2), ylim=c(0,23), cex.main=3.5, cex.axis=2)
lines(density(subset(x, ssizelor == 1000 & stempbfkl == 7)$diff_map), col="black")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 7)$diff_map), col="red")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 7)$diff_map), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 7)$diff_map), col="green")
abline(v=0)
#legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500", "n=200", "n=100"),
#       col=c("blue", "red", "purple", "yellow", "green"), lty=1)
legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "black", "red"), lty=1)

dev.off()


# individual plots

plot(density(subset(x, ssizelor == 2000 & stempbfkl == 1)$mAPlor), col="blue")
lines(density(subset(x, ssizelor == 2000 & stempbfkl == 1)$mAPbfkl), col="black") 

plot(density(subset(x, ssizelor == 1000 & stempbfkl == 1)$mAPlor), col="blue")
lines(density(subset(x, ssizelor == 1000 & stempbfkl == 1)$mAPbfkl), col="black")

plot(density(subset(x, ssizelor == 500 & stempbfkl == 1)$mAPlor), col="blue")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 1)$mAPbfkl), col="black") 

#plot(density(subset(x, ssizelor == 200 & stempbfkl == 1)$mAPlor), col="blue")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 1)$mAPbfkl), col="black") 

#plot(density(subset(x, ssizelor == 100 & stempbfkl == 1)$mAPlor), col="blue")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 1)$mAPbfkl), col="black") 

table(x$methodbfkl, x$ssizebfkl)
table(x$methodlor, x$ssizelor)

plot(density(subset(m, ssize == 100 & stemp == 1 & method=="lorentz" & dim==2 & discrete==0)$mAP), col="blue")

# comparison of diff in mr

pdf("~/densities_MR.pdf", 30,7)
par(mfrow=c(1,3))
plot(density(subset(x, ssizelor == 2000 & stempbfkl == 1)$diff_mr), main="comparison of -log(MR), T=0.1", col="blue", xlab=NA, ylim=c(0,1.1))
lines(density(subset(x, ssizelor == 1000 & stempbfkl == 1)$diff_mr), col="black")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 1)$diff_mr), col="red")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 1)$diff_mr), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 1)$diff_mr), col="green")
abline(v=0)
#legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500", "n=200", "n=100"),
#       col=c("blue", "red", "purple", "yellow", "green"), lty=1)
legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "black", "red"), lty=1)

plot(density(subset(x, ssizelor == 2000 & stempbfkl == 4)$diff_mr), main="comparison of -log(MR), T=0.4", col="blue", xlab=NA, ylim=c(0,2.0))
lines(density(subset(x, ssizelor == 1000 & stempbfkl ==4)$diff_mr), col="black")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 4)$diff_mr), col="red")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 4)$diff_mr), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 4)$diff_mr), col="green")
abline(v=0)
#legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500", "n=200", "n=100"),
#       col=c("blue", "red", "purple", "yellow", "green"), lty=1)
legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "black", "red"), lty=1)

plot(density(subset(x, ssizelor == 2000 & stempbfkl == 7)$diff_mr), main="comparison of -log(MR), T=0.7", col="blue", xlab=NA, ylim=c(0,3.2))
lines(density(subset(x, ssizelor == 1000 & stempbfkl == 7)$diff_mr), col="black")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 7)$diff_mr), col="red")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 7)$diff_mr), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 7)$diff_mr), col="green")
abline(v=0)
#legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500", "n=200", "n=100"),
#       col=c("blue", "red", "purple", "yellow", "green"), lty=1)
legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "black", "red"), lty=1)
dev.off()


# comparison of diff in greedy

pdf("~/densities_greedy.pdf", 30,7)
par(mfrow=c(1,3))
plot(density(subset(x, ssizelor == 2000 & stempbfkl == 1)$diff_greedy), main="comparison of greedy success, T=0.1", col="blue", xlab=NA, ylab=NA, xlim=c(-0.25,0.2), ylim=c(0,13), cex.main=3.5, cex.axis=2)
lines(density(subset(x, ssizelor == 1000 & stempbfkl == 1)$diff_greedy), col="black")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 1)$diff_greedy), col="red")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 1)$diff_greedy), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 1)$diff_greedy), col="green")
abline(v=0)
#legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500", "n=200", "n=100"),
#       col=c("blue", "red", "purple", "yellow", "green"), lty=1)
legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "black", "red"), lty=1)

plot(density(subset(x, ssizelor == 2000 & stempbfkl == 4)$diff_greedy), main="comparison of greedy success, T=0.4", col="blue", xlab=NA, ylab=NA, xlim=c(-0.1,0.2), ylim=c(0,13), cex.main=3.5, cex.axis=2)
lines(density(subset(x, ssizelor == 1000 & stempbfkl ==4)$diff_greedy), col="black")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 4)$diff_greedy), col="red")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 4)$diff_greedy), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 4)$diff_greedy), col="green")
abline(v=0)
#legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500", "n=200", "n=100"),
#       col=c("blue", "red", "purple", "yellow", "green"), lty=1)
legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "black", "red"), lty=1)

plot(density(subset(x, ssizelor == 2000 & stempbfkl == 7)$diff_greedy), main="comparison of greedy success, T=0.7", col="blue", xlab=NA, ylab=NA, xlim=c(-0.1,0.2), ylim=c(0,13), cex.main=3.5, cex.axis=2 )
lines(density(subset(x, ssizelor == 1000 & stempbfkl == 7)$diff_greedy), col="black")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 7)$diff_greedy), col="red")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 7)$diff_greedy), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 7)$diff_greedy), col="green")
abline(v=0)
#legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500", "n=200", "n=100"),
#       col=c("blue", "red", "purple", "yellow", "green"), lty=1)
legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "black", "red"), lty=1)
dev.off()

# comparison of diff in stretch

pdf("~/densities_stretch.pdf", 30,7)
par(mfrow=c(1,3))
plot(density(subset(x, ssizelor == 2000 & stempbfkl == 1)$diff_stretch), main="comparison of -log(stretch), T=0.1", col="blue", xlab=NA, ylim=c(0,13), xlim=c(-0.15,0.15))
lines(density(subset(x, ssizelor == 1000 & stempbfkl == 1)$diff_stretch), col="black")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 1)$diff_stretch), col="red")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 1)$diff_stretch), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 1)$diff_stretch), col="green")
abline(v=0)
#legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500", "n=200", "n=100"),
#       col=c("blue", "red", "purple", "yellow", "green"), lty=1)
legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "black", "red"), lty=1)

plot(density(subset(x, ssizelor == 2000 & stempbfkl == 4)$diff_stretch), main="comparison of -log(stretch), T=0.4", col="blue", xlab=NA, ylim=c(0,15))
lines(density(subset(x, ssizelor == 1000 & stempbfkl ==4)$diff_stretch), col="black")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 4)$diff_stretch), col="red")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 4)$diff_stretch), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 4)$diff_stretch), col="green")
abline(v=0)
#legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500", "n=200", "n=100"),
#       col=c("blue", "red", "purple", "yellow", "green"), lty=1)
legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "black", "red"), lty=1)

plot(density(subset(x, ssizelor == 2000 & stempbfkl == 7)$diff_stretch), main="comparison of -log(stretch), T=0.7", col="blue", xlab=NA, ylim=c(0,15))
lines(density(subset(x, ssizelor == 1000 & stempbfkl == 7)$diff_stretch), col="black")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 7)$diff_stretch), col="red")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 7)$diff_stretch), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 7)$diff_stretch), col="green")
abline(v=0)
#legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500", "n=200", "n=100"),
#       col=c("blue", "red", "purple", "yellow", "green"), lty=1)
legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "black", "red"), lty=1)
dev.off()

# diffs in control
pdf("~/densities_control.pdf", 30,7)
par(mfrow=c(1,3))
plot(density(subset(x, ssizelor == 2000 & stempbfkl == 1)$diff_control), main="comparison of control, T=0.1", col="blue", xlab=NA, xlim=c(-0.15, 0.15))
lines(density(subset(x, ssizelor == 1000 & stempbfkl == 1)$diff_control), col="black")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 1)$diff_control), col="red")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 1)$diff_control), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 1)$diff_control), col="green")
abline(v=0)
#legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500", "n=200", "n=100"),
#       col=c("blue", "red", "purple", "yellow", "green"), lty=1)
legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "black", "red"), lty=1)

plot(density(subset(x, ssizelor == 2000 & stempbfkl == 4)$diff_control), main="comparison of control, T=0.4", col="blue", xlab=NA, xlim=c(-0.15, 0.20))
lines(density(subset(x, ssizelor == 1000 & stempbfkl ==4)$diff_control), col="black")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 4)$diff_control), col="red")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 4)$diff_control), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 4)$diff_control), col="green")
abline(v=0)
#legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500", "n=200", "n=100"),
#       col=c("blue", "red", "purple", "yellow", "green"), lty=1)
legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "black", "red"), lty=1)


plot(density(subset(x, ssizelor == 2000 & stempbfkl == 7)$diff_control), main="comparison of control, T=0.7", col="blue", xlab=NA, xlim=c(-0.1,0.2), ylim=c(0,45))
lines(density(subset(x, ssizelor == 1000 & stempbfkl == 7)$diff_control), col="black")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 7)$diff_control), col="red")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 7)$diff_control), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 7)$diff_control), col="green")
abline(v=0)
#legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500", "n=200", "n=100"),
#       col=c("blue", "red", "purple", "yellow", "green"), lty=1)
legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "black", "red"), lty=1)

dev.off()

###########

# regressions

set.seed(17052023)
x$bfklwins_map <- as.factor(ifelse(x$diff_map<0, 1,0))
x$bfklwins_mr <- as.factor(ifelse(x$diff_mr<0, 1,0))
x$bfklwins_greedy <- as.factor(ifelse(x$diff_greedy<0, 1,0))
x$bfklwins_stretch <- as.factor(ifelse(x$diff_stretch<0, 1,0))
x$bfklwins_control <- as.factor(ifelse(x$diff_control<0, 1,0))
x$bfklwins_eff <- as.factor(ifelse(x$diff_eff<0, 1,0))


table(subset(x, stempbfkl==1)$bfklwins_map)
table(subset(x, stempbfkl==4)$bfklwins_map)
table(subset(x, stempbfkl==7)$bfklwins_map)

table(subset(x, stempbfkl==1)$bfklwins_mr)
table(subset(x, stempbfkl==4)$bfklwins_mr)
table(subset(x, stempbfkl==7)$bfklwins_mr)

xlog <- subset(x, ssizebfkl >=500)

library(caret)
train_control <- trainControl(method='cv', number=20)

kfold_train <- train(bfklwins_map~as.factor(stempbfkl) + nbfkl + mbfkl, data=xlog, 
                     method='glm', family=binomial, trControl=train_control)

print(kfold_train)

kfold_train <- train(bfklwins_map~as.factor(stempbfkl) + as.factor(ssizebfkl), data=xlog, 
                     method='glm', family=binomial, trControl=train_control)

print(kfold_train)

reg <- glm(bfklwins_map~as.factor(stempbfkl) + nbfkl + mbfkl, data=xlog, family=binomial)
summary(reg)

reg <- glm(bfklwins_map~as.factor(stempbfkl) + as.factor(ssizebfkl), data=xlog, family=binomial)
summary(reg)

library(lmtest)
lrtest(reg, . ~ . - nbfkl - mbfkl)

# for mr
#kfold_train <- train(bfklwins_mr~as.factor(stempbfkl) + nbfkl + mbfkl, data=xlog, 
#                     method='glm', family=binomial, trControl=train_control)

#print(kfold_train)

kfold_train <- train(bfklwins_mr~as.factor(stempbfkl) + as.factor(ssizebfkl), data=xlog, 
                     method='glm', family=binomial, trControl=train_control)

print(kfold_train)

#reg <- glm(bfklwins_mr~as.factor(stempbfkl) + nbfkl + mbfkl, data=xlog, family=binomial)
#summary(reg)

reg <- glm(bfklwins_mr~as.factor(stempbfkl) + as.factor(ssizebfkl), data=xlog, family=binomial)
summary(reg)

# for greedy
library(caret)
train_control <- trainControl(method='cv', number=20)

#kfold_train <- train(bfklwins_greedy~as.factor(stempbfkl) + nbfkl + mbfkl, data=xlog, 
#                    method='glm', family=binomial, trControl=train_control)
#print(kfold_train)

kfold_train <- train(bfklwins_greedy~as.factor(stempbfkl) + as.factor(ssizebfkl), data=xlog, 
                     method='glm', family=binomial, trControl=train_control)

print(kfold_train)

#reg <- glm(bfklwins_greedy~as.factor(stempbfkl) + nbfkl + mbfkl, data=xlog, family=binomial)
#summary(reg)

reg <- glm(bfklwins_greedy~as.factor(stempbfkl) + as.factor(ssizebfkl), data=xlog, family=binomial)
summary(reg)

# for stretch
library(caret)
train_control <- trainControl(method='cv', number=20)

#kfold_train <- train(bfklwins_greedy~as.factor(stempbfkl) + nbfkl + mbfkl, data=xlog, 
#                    method='glm', family=binomial, trControl=train_control)
#print(kfold_train)

kfold_train <- train(bfklwins_stretch~as.factor(stempbfkl) + as.factor(ssizebfkl), data=xlog, 
                     method='glm', family=binomial, trControl=train_control)

print(kfold_train)

#reg <- glm(bfklwins_greedy~as.factor(stempbfkl) + nbfkl + mbfkl, data=xlog, family=binomial)
#summary(reg)

reg <- glm(bfklwins_stretch~as.factor(stempbfkl) + as.factor(ssizebfkl), data=xlog, family=binomial)
summary(reg)

# for control
library(caret)
train_control <- trainControl(method='cv', number=20)

#kfold_train <- train(bfklwins_greedy~as.factor(stempbfkl) + nbfkl + mbfkl, data=xlog, 
#                    method='glm', family=binomial, trControl=train_control)
#print(kfold_train)
xlog$radiusratio <- xlog$radiusbfkl/xlog$radiuslor
kfold_train <- train(bfklwins_control~as.factor(stempbfkl) + as.factor(ssizebfkl) +radiusratio, data=xlog, 
                     method='glm', family=binomial, trControl=train_control)

print(kfold_train)

#reg <- glm(bfklwins_greedy~as.factor(stempbfkl) + nbfkl + mbfkl, data=xlog, family=binomial)
#summary(reg)

reg <- glm(bfklwins_control~as.factor(stempbfkl) + as.factor(ssizebfkl) + radiusratio , data=xlog, family=binomial)
summary(reg)

# for gre
library(caret)
train_control <- trainControl(method='cv', number=20)

#kfold_train <- train(bfklwins_greedy~as.factor(stempbfkl) + nbfkl + mbfkl, data=xlog, 
#                    method='glm', family=binomial, trControl=train_control)
#print(kfold_train)
xlog$radiusratio <- xlog$radiusbfkl/xlog$radiuslor
kfold_train <- train(bfklwins_eff~as.factor(stempbfkl) + as.factor(ssizebfkl), data=xlog, 
                     method='glm', family=binomial, trControl=train_control)

print(kfold_train)

#reg <- glm(bfklwins_greedy~as.factor(stempbfkl) + nbfkl + mbfkl, data=xlog, family=binomial)
#summary(reg)

reg <- glm(bfklwins_eff~as.factor(stempbfkl) + as.factor(ssizebfkl) , data=xlog, family=binomial)
summary(reg)

# scatterplot for time vs quality

# change to precise

xlog$timeloss <- (xlog$timelor/xlog$precise_time)


lorbet_map <- subset(xlog, bfklwins_map==0)
lorbet_map$pergain <- 100*((lorbet_map$mAPlor/lorbet_map$mAPbfkl)-1)



pdf("~/gain_MAP01.pdf", 10,7)
library(ggplot2)
# Basic scatter plot
ggplot(subset(lorbet_map,stempbfkl==1), aes(x=pergain, y=timeloss)) + geom_point() + theme_minimal(base_size = 40) +
  labs(title="T=0.1", x="", y = "")
#plot(subset(lorbet_map,stempbfkl==1)$pergain, subset(lorbet_map,stempbfkl==1)$timeloss, xlab = "Percentage gain in quality", ylab = "How many times longer it takes",  main="T=0.1", cex.lab=2, cex.main=2, cex.axis = 2)
#plot(subset(lorbet_map,stempbfkl==1)$pergain, subset(lorbet_map,stempbfkl==1)$timeloss, xlab = "Percentage gain in quality", ylab = "",  main="T=0.1", cex.lab=2.5, cex.main=2.5, cex.axis = 2.5)
dev.off()

cor.test(subset(lorbet_map,stempbfkl==1)$pergain, subset(lorbet_map,stempbfkl==1)$timeloss, method="kendall")

pdf("~/gain_MAP04.pdf", 10,7)
library(ggplot2)
# Basic scatter plot
ggplot(subset(lorbet_map,stempbfkl==4), aes(x=pergain, y=timeloss)) + geom_point() + theme_minimal(base_size = 40) +
  labs(title="T=0.4", x="", y = "")
#plot(subset(lorbet_map,stempbfkl==4)$pergain, subset(lorbet_map,stempbfkl==4)$timeloss,xlab = "Percentage gain in quality", ylab = "How many times longer it takes", main="T=0.4")
dev.off()

cor.test(subset(lorbet_map,stempbfkl==4)$pergain, subset(lorbet_map,stempbfkl==4)$timeloss, method="kendall")

pdf("~/gain_MAP07.pdf", 10,7)
library(ggplot2)
# Basic scatter plot
ggplot(subset(lorbet_map,stempbfkl==7), aes(x=pergain, y=timeloss)) + geom_point() + theme_minimal(base_size = 40) +
  labs(title="T=0.7", x="", y = "")
#plot(subset(lorbet_map,stempbfkl==7)$pergain, subset(lorbet_map,stempbfkl==7)$timeloss, xlab = "Percentage gain in quality", ylab = "How many times longer it takes",  main="T=0.7")
dev.off()

cor.test(subset(lorbet_map,stempbfkl==7)$pergain, subset(lorbet_map,stempbfkl==7)$timeloss, method="kendall")


# analysis for +dhrg
# + dhrg
sim$graph_name <- paste(sim$graph, sim$ssize, sim$stemp, sim$sid)
sim$graph <- sim$graph_name
aux <- subset(sim, name=="Lorentz2D+DHRG" | name == "Lorentz2D")


p <- aux  %>%
  ggplot( aes(x=mAP, fill=name)) +
  geom_histogram( color="#e9ecef", alpha=0.6, position = 'identity') +
  scale_fill_manual(values=c("#69b3a2", "#404080")) +
  theme_minimal() +
  labs(fill="")
print(p)

library(reshape2)
wide <- dcast(as.data.table(aux), graph ~ name, value.var="mAP")
wide$diff <- wide$Lorentz2D - wide[["Lorentz2D+DHRG"]]

plot(density(wide$diff))


aux <- subset(sim, name=="Poincare2D+DHRG" | name == "Poincare2D")

p <- aux  %>%
  ggplot( aes(x=mAP, fill=name)) +
  geom_histogram( color="#e9ecef", alpha=0.6, position = 'identity') +
  scale_fill_manual(values=c("#69b3a2", "#404080")) +
  theme_minimal() +
  labs(fill="")
print(p)

wide <- dcast(as.data.table(aux), graph ~ name, value.var="mAP")
wide$diff <- wide$Poincare2D - wide[["Poincare2D+DHRG"]]

plot(density(wide$diff))

aux <- subset(sim, name=="HypCLOVE+DHRG" | name == "HypCLOVE")

p <- aux  %>%
  ggplot( aes(x=mAP, fill=name)) +
  geom_histogram( color="#e9ecef", alpha=0.6, position = 'identity') +
  scale_fill_manual(values=c("#69b3a2", "#404080")) +
  theme_minimal() +
  labs(fill="")
print(p)

wide <- dcast(as.data.table(aux), graph ~ name, value.var="mAP")
wide$diff <- wide$HypCLOVE - wide[["HypCLOVE+DHRG"]]

plot(density(wide$diff))




